19
19
from dask_sql import Context
20
20
21
21
22
+ def cast_datetime_to_string (df ):
23
+ cols = df .select_dtypes (include = ["datetime64[ns]" ]).columns
24
+ # Casting to object first as
25
+ # directly converting to string looses second precision
26
+ df [cols ] = df [cols ].astype ("object" ).astype ("string" )
27
+ return df
28
+
29
+
22
30
def eq_sqlite (sql , ** dfs ):
23
31
c = Context ()
24
32
engine = sqlite3 .connect (":memory:" )
@@ -30,6 +38,10 @@ def eq_sqlite(sql, **dfs):
30
38
dask_result = c .sql (sql ).compute ().reset_index (drop = True )
31
39
sqlite_result = pd .read_sql (sql , engine ).reset_index (drop = True )
32
40
41
+ # casting to object to ensure equality with sql-lite
42
+ # which returns object dtype for datetime inputs
43
+ dask_result = cast_datetime_to_string (dask_result )
44
+
33
45
# Make sure SQL and Dask use the same "NULL" value
34
46
dask_result = dask_result .fillna (np .NaN )
35
47
sqlite_result = sqlite_result .fillna (np .NaN )
@@ -349,6 +361,7 @@ def test_agg_min_max_no_group_by():
349
361
d = (str , 40 ),
350
362
e = (float , 40 ),
351
363
f = (pd .StringDtype , 40 ),
364
+ g = (datetime , 40 ),
352
365
)
353
366
eq_sqlite (
354
367
"""
@@ -365,6 +378,8 @@ def test_agg_min_max_no_group_by():
365
378
MAX(e) AS max_e,
366
379
MIN(f) as min_f,
367
380
MAX(f) as max_f,
381
+ MIN(g) as min_g,
382
+ MAX(g) as max_g,
368
383
MIN(a+e) AS mix_1,
369
384
MIN(a)+MIN(e) AS mix_2
370
385
FROM a
@@ -382,6 +397,7 @@ def test_agg_min_max():
382
397
d = (str , 40 ),
383
398
e = (float , 40 ),
384
399
f = (pd .StringDtype , 40 ),
400
+ g = (datetime , 40 ),
385
401
)
386
402
eq_sqlite (
387
403
"""
@@ -395,6 +411,8 @@ def test_agg_min_max():
395
411
MAX(e) AS max_e,
396
412
MIN(f) AS min_f,
397
413
MAX(f) AS max_f,
414
+ MIN(g) AS min_g,
415
+ MAX(g) AS max_g,
398
416
MIN(a+e) AS mix_1,
399
417
MIN(a)+MIN(e) AS mix_2
400
418
FROM a GROUP BY a, b
0 commit comments