@@ -36,13 +36,13 @@ print("grouping...\n"); flush(stdout);
36
36
37
37
question = " sum v1 by id1" ; # q1
38
38
GC. gc ();
39
- t = @elapsed (ANS = combine (groupby (x, :id1 ), skipmissing ( :v1 ) => sum => :v1 ); println (size (ANS)); flush (stdout ));
39
+ t = @elapsed (ANS = combine (groupby (x, :id1 ), :v1 => sum∘ skipmissing => :v1 ); println (size (ANS)); flush (stdout ));
40
40
m = memory_usage ();
41
41
chkt = @elapsed chk = sum (ANS. v1);
42
42
write_log (1 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
43
43
ANS = 0 ;
44
44
GC. gc ();
45
- t = @elapsed (ANS = combine (groupby (x, :id1 ), skipmissing ( :v1 ) => sum => :v1 ); println (size (ANS)); flush (stdout ));
45
+ t = @elapsed (ANS = combine (groupby (x, :id1 ), :v1 => sum∘ skipmissing => :v1 ); println (size (ANS)); flush (stdout ));
46
46
m = memory_usage ();
47
47
chkt = @elapsed chk = sum (ANS. v1);
48
48
write_log (2 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
@@ -52,13 +52,13 @@ ANS = 0;
52
52
53
53
question = " sum v1 by id1:id2" ; # q2
54
54
GC. gc ();
55
- t = @elapsed (ANS = combine (groupby (x, [:id1 , :id2 ]), :v1 => sum => :v1 ); println (size (ANS)); flush (stdout ));
55
+ t = @elapsed (ANS = combine (groupby (x, [:id1 , :id2 ]), :v1 => sum∘ skipmissing => :v1 ); println (size (ANS)); flush (stdout ));
56
56
m = memory_usage ();
57
57
chkt = @elapsed chk = sum (ANS. v1);
58
58
write_log (1 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
59
59
ANS = 0 ;
60
60
GC. gc ();
61
- t = @elapsed (ANS = combine (groupby (x, [:id1 , :id2 ]), :v1 => sum => :v1 ); println (size (ANS)); flush (stdout ));
61
+ t = @elapsed (ANS = combine (groupby (x, [:id1 , :id2 ]), :v1 => sum∘ skipmissing => :v1 ); println (size (ANS)); flush (stdout ));
62
62
m = memory_usage ();
63
63
chkt = @elapsed chk = sum (ANS. v1);
64
64
write_log (2 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
@@ -68,13 +68,13 @@ ANS = 0;
68
68
69
69
question = " sum v1 mean v3 by id3" ; # q3
70
70
GC. gc ();
71
- t = @elapsed (ANS = combine (groupby (x, :id3 ), :v1 => sum => :v1 , :v3 => mean => :v3 ); println (size (ANS)); flush (stdout ));
71
+ t = @elapsed (ANS = combine (groupby (x, :id3 ), :v1 => sum∘ skipmissing => :v1 , :v3 => mean∘ skipmissing => :v3 ); println (size (ANS)); flush (stdout ));
72
72
m = memory_usage ();
73
73
chkt = @elapsed chk = [sum (ANS. v1), sum (ANS. v3)];
74
74
write_log (1 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
75
75
ANS = 0 ;
76
76
GC. gc ();
77
- t = @elapsed (ANS = combine (groupby (x, :id3 ), :v1 => sum => :v1 , :v3 => mean => :v3 ); println (size (ANS)); flush (stdout ));
77
+ t = @elapsed (ANS = combine (groupby (x, :id3 ), :v1 => sum∘ skipmissing => :v1 , :v3 => mean∘ skipmissing => :v3 ); println (size (ANS)); flush (stdout ));
78
78
m = memory_usage ();
79
79
chkt = @elapsed chk = [sum (ANS. v1), sum (ANS. v3)];
80
80
write_log (2 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
@@ -84,14 +84,14 @@ ANS = 0;
84
84
85
85
question = " mean v1:v3 by id4" ; # q4
86
86
GC. gc ();
87
- t = @elapsed (ANS = combine (groupby (x, :id4 ), :v1 => mean => :v1 , :v2 => mean => :v2 , :v3 => mean => :v3 ); println (size (ANS)); flush (stdout ));
87
+ t = @elapsed (ANS = combine (groupby (x, :id4 ), :v1 => mean∘ skipmissing => :v1 , :v2 => mean∘ skipmissing => :v2 , :v3 => mean∘ skipmissing => :v3 ); println (size (ANS)); flush (stdout ));
88
88
m = memory_usage ();
89
89
t_start = time_ns ();
90
90
chkt = @elapsed chk = [sum (ANS. v1), sum (ANS. v2), sum (ANS. v3)];
91
91
write_log (1 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
92
92
ANS = 0 ;
93
93
GC. gc ();
94
- t = @elapsed (ANS = combine (groupby (x, :id4 ), :v1 => mean => :v1 , :v2 => mean => :v2 , :v3 => mean => :v3 ); println (size (ANS)); flush (stdout ));
94
+ t = @elapsed (ANS = combine (groupby (x, :id4 ), :v1 => mean∘ skipmissing => :v1 , :v2 => mean∘ skipmissing => :v2 , :v3 => mean∘ skipmissing => :v3 ); println (size (ANS)); flush (stdout ));
95
95
m = memory_usage ();
96
96
chkt = @elapsed chk = [sum (ANS. v1), sum (ANS. v2), sum (ANS. v3)];
97
97
write_log (2 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
@@ -101,13 +101,13 @@ ANS = 0;
101
101
102
102
question = " sum v1:v3 by id6" ; # q5
103
103
GC. gc ();
104
- t = @elapsed (ANS = combine (groupby (x, :id6 ), :v1 => sum => :v1 , :v2 => sum => :v2 , :v3 => sum => :v3 ); println (size (ANS)); flush (stdout ));
104
+ t = @elapsed (ANS = combine (groupby (x, :id6 ), :v1 => sum∘ skipmissing => :v1 , :v2 => sum∘ skipmissing => :v2 , :v3 => sum∘ skipmissing => :v3 ); println (size (ANS)); flush (stdout ));
105
105
m = memory_usage ();
106
106
chkt = @elapsed chk = [sum (ANS. v1), sum (ANS. v2), sum (ANS. v3)];
107
107
write_log (1 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
108
108
ANS = 0 ;
109
109
GC. gc ();
110
- t = @elapsed (ANS = combine (groupby (x, :id6 ), :v1 => sum => :v1 , :v2 => sum => :v2 , :v3 => sum => :v3 ); println (size (ANS)); flush (stdout ));
110
+ t = @elapsed (ANS = combine (groupby (x, :id6 ), :v1 => sum∘ skipmissing => :v1 , :v2 => sum∘ skipmissing => :v2 , :v3 => sum∘ skipmissing => :v3 ); println (size (ANS)); flush (stdout ));
111
111
m = memory_usage ();
112
112
chkt = @elapsed chk = [sum (ANS. v1), sum (ANS. v2), sum (ANS. v3)];
113
113
write_log (2 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
@@ -117,13 +117,13 @@ ANS = 0;
117
117
118
118
question = " median v3 sd v3 by id4 id5" ; # q6
119
119
GC. gc ();
120
- t = @elapsed (ANS = combine (groupby (x, [:id4 , :id5 ]), :v3 => median => :median_v3 , :v3 => std => :sd_v3 ); println (size (ANS)); flush (stdout ));
120
+ t = @elapsed (ANS = combine (groupby (x, [:id4 , :id5 ]), :v3 => median∘ skipmissing => :median_v3 , :v3 => std∘ skipmissing => :sd_v3 ); println (size (ANS)); flush (stdout ));
121
121
m = memory_usage ();
122
122
chkt = @elapsed chk = [sum (ANS. median_v3), sum (ANS. sd_v3)];
123
123
write_log (1 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
124
124
ANS = 0 ;
125
125
GC. gc ();
126
- t = @elapsed (ANS = combine (groupby (x, [:id4 , :id5 ]), :v3 => median => :median_v3 , :v3 => std => :sd_v3 ); println (size (ANS)); flush (stdout ));
126
+ t = @elapsed (ANS = combine (groupby (x, [:id4 , :id5 ]), :v3 => median∘ skipmissing => :median_v3 , :v3 => std∘ skipmissing => :sd_v3 ); println (size (ANS)); flush (stdout ));
127
127
m = memory_usage ();
128
128
chkt = @elapsed chk = [sum (ANS. median_v3), sum (ANS. sd_v3)];;
129
129
write_log (2 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
@@ -133,13 +133,13 @@ ANS = 0;
133
133
134
134
question = " max v1 - min v2 by id3" ; # q7
135
135
GC. gc ();
136
- t = @elapsed (ANS = combine (groupby (x, :id3 ), [:v1 , :v2 ] => ((v1, v2) -> maximum (v1)- minimum (v2 )) => :range_v1_v2 ); println (size (ANS)); flush (stdout ));
136
+ t = @elapsed (ANS = combine (groupby (x, :id3 ), [:v1 , :v2 ] => ((v1, v2) -> maximum (skipmissing ( v1)) - minimum (skipmissing (v2) )) => :range_v1_v2 ); println (size (ANS)); flush (stdout ));
137
137
m = memory_usage ();
138
138
chkt = @elapsed chk = sum (ANS. range_v1_v2);
139
139
write_log (1 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
140
140
ANS = 0 ;
141
141
GC. gc ();
142
- t = @elapsed (ANS = combine (groupby (x, :id3 ), [:v1 , :v2 ] => ((v1, v2) -> maximum (v1)- minimum (v2 )) => :range_v1_v2 ); println (size (ANS)); flush (stdout ));
142
+ t = @elapsed (ANS = combine (groupby (x, :id3 ), [:v1 , :v2 ] => ((v1, v2) -> maximum (skipmissing ( v1)) - minimum (skipmissing (v2) )) => :range_v1_v2 ); println (size (ANS)); flush (stdout ));
143
143
m = memory_usage ();
144
144
chkt = @elapsed chk = sum (ANS. range_v1_v2);
145
145
write_log (2 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
@@ -149,6 +149,7 @@ ANS = 0;
149
149
150
150
question = " largest two v3 by id6" ; # q8
151
151
GC. gc ();
152
+ # # TODO
152
153
t = @elapsed (ANS = combine (groupby (x, :id6 ), :v3 => (x -> partialsort (x, 1 : min (2 , length (x)), rev= true )) => :largest2_v3 ); println (size (ANS)); flush (stdout ));
153
154
m = memory_usage ();
154
155
chkt = @elapsed chk = sum (ANS. largest2_v3);
@@ -165,6 +166,7 @@ ANS = 0;
165
166
166
167
question = " regression v1 v2 by id2 id4" ; # q9
167
168
GC. gc ();
169
+ # # TODO
168
170
t = @elapsed (ANS = combine (groupby (x, [:id2 , :id4 ]), [:v1 , :v2 ] => ((v1,v2) -> cor (v1, v2)^ 2 ) => :r2 ); println (size (ANS)); flush (stdout ));
169
171
m = memory_usage ();
170
172
chkt = @elapsed chk = sum (ANS. r2);
@@ -181,13 +183,13 @@ ANS = 0;
181
183
182
184
question = " sum v3 count by id1:id6" ; # q10
183
185
GC. gc ();
184
- t = @elapsed (ANS = combine (groupby (x, [:id1 , :id2 , :id3 , :id4 , :id5 , :id6 ]), :v3 => sum => :v3 , :v3 => length => :count ); println (size (ANS)); flush (stdout ));
186
+ t = @elapsed (ANS = combine (groupby (x, [:id1 , :id2 , :id3 , :id4 , :id5 , :id6 ]), :v3 => sum∘ skipmissing => :v3 , :v3 => length => :count ); println (size (ANS)); flush (stdout ));
185
187
m = memory_usage ();
186
188
chkt = @elapsed chk = [sum (ANS. v3), sum (ANS. count)];
187
189
write_log (1 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
188
190
ANS = 0 ;
189
191
GC. gc ();
190
- t = @elapsed (ANS = combine (groupby (x, [:id1 , :id2 , :id3 , :id4 , :id5 , :id6 ]), :v3 => sum => :v3 , :v3 => length => :count ); println (size (ANS)); flush (stdout ));
192
+ t = @elapsed (ANS = combine (groupby (x, [:id1 , :id2 , :id3 , :id4 , :id5 , :id6 ]), :v3 => sum∘ skipmissing => :v3 , :v3 => length => :count ); println (size (ANS)); flush (stdout ));
191
193
m = memory_usage ();
192
194
chkt = @elapsed chk = [sum (ANS. v3), sum (ANS. count)];
193
195
write_log (2 , task, data_name, in_rows, question, size (ANS, 1 ), size (ANS, 2 ), solution, ver, git, fun, t, m, cache, make_chk (chk), chkt, on_disk);
0 commit comments