Skip to content

Commit 294f4c3

Browse files
committed
test: fix json tests fail locally with disable allow_large_results
1 parent e9fe815 commit 294f4c3

File tree

4 files changed

+117
-58
lines changed

4 files changed

+117
-58
lines changed

tests/system/small/bigquery/test_json.py

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,6 @@
2222
import bigframes.pandas as bpd
2323

2424

25-
@pytest.fixture(scope="module", autouse=True)
26-
def use_large_query_path():
27-
# b/401630655
28-
with bpd.option_context("bigquery.allow_large_results", True):
29-
yield
30-
31-
3225
@pytest.mark.parametrize(
3326
("json_path", "expected_json"),
3427
[
@@ -39,12 +32,14 @@ def use_large_query_path():
3932
def test_json_set_at_json_path(json_path, expected_json):
4033
original_json = ['{"a": {"b": {"c": "tester", "d": []}}}']
4134
s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
42-
actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
4335

36+
actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
4437
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
38+
39+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
4540
pd.testing.assert_series_equal(
46-
actual.to_pandas(),
47-
expected.to_pandas(),
41+
actual.to_pandas(allow_large_results=True),
42+
expected.to_pandas(allow_large_results=True),
4843
)
4944

5045

@@ -63,11 +58,12 @@ def test_json_set_at_json_value_type(json_value, expected_json):
6358
original_json = ['{"a": {"b": "dev"}}', '{"a": {"b": [1, 2]}}']
6459
s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
6560
actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
66-
6761
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
62+
63+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
6864
pd.testing.assert_series_equal(
69-
actual.to_pandas(),
70-
expected.to_pandas(),
65+
actual.to_pandas(allow_large_results=True),
66+
expected.to_pandas(allow_large_results=True),
7167
)
7268

7369

@@ -80,18 +76,14 @@ def test_json_set_w_more_pairs():
8076

8177
expected_json = ['{"a": 3, "b": 2}', '{"a": 4, "b": 2}', '{"a": 5, "b": 2, "c": 1}']
8278
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
79+
80+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
8381
pd.testing.assert_series_equal(
84-
actual.to_pandas(),
85-
expected.to_pandas(),
82+
actual.to_pandas(allow_large_results=True),
83+
expected.to_pandas(allow_large_results=True),
8684
)
8785

8886

89-
def test_json_set_w_invalid_json_path_value_pairs():
90-
s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
91-
with pytest.raises(ValueError):
92-
bbq.json_set(s, json_path_value_pairs=[("$.a", 1, 100)]) # type: ignore
93-
94-
9587
def test_json_set_w_invalid_value_type():
9688
s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
9789
with pytest.raises(TypeError):
@@ -119,11 +111,13 @@ def test_json_extract_from_json():
119111
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
120112
dtype=dtypes.JSON_DTYPE,
121113
)
122-
actual = bbq.json_extract(s, "$.a.b").to_pandas()
123-
expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE).to_pandas()
114+
actual = bbq.json_extract(s, "$.a.b")
115+
expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE)
116+
117+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
124118
pd.testing.assert_series_equal(
125-
actual,
126-
expected,
119+
actual.to_pandas(allow_large_results=True),
120+
expected.to_pandas(allow_large_results=True),
127121
)
128122

129123

@@ -134,9 +128,11 @@ def test_json_extract_from_string():
134128
)
135129
actual = bbq.json_extract(s, "$.a.b")
136130
expected = bpd.Series(["[1,2]", None, "0"], dtype=pd.StringDtype(storage="pyarrow"))
131+
132+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
137133
pd.testing.assert_series_equal(
138-
actual.to_pandas(),
139-
expected.to_pandas(),
134+
actual.to_pandas(allow_large_results=True),
135+
expected.to_pandas(allow_large_results=True),
140136
)
141137

142138

@@ -169,9 +165,10 @@ def test_json_extract_array_from_json():
169165
expected.index.name = None
170166
expected.name = None
171167

168+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
172169
pd.testing.assert_series_equal(
173-
actual.to_pandas(),
174-
expected.to_pandas(),
170+
actual.to_pandas(allow_large_results=True),
171+
expected.to_pandas(allow_large_results=True),
175172
)
176173

177174

@@ -185,9 +182,11 @@ def test_json_extract_array_from_json_strings():
185182
[['"ab"', '"2"', '"3 xy"'], [], ['"4"', '"5"'], None],
186183
dtype=pd.ArrowDtype(pa.list_(pa.string())),
187184
)
185+
186+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
188187
pd.testing.assert_series_equal(
189-
actual.to_pandas(),
190-
expected.to_pandas(),
188+
actual.to_pandas(allow_large_results=True),
189+
expected.to_pandas(allow_large_results=True),
191190
)
192191

193192

@@ -201,9 +200,11 @@ def test_json_extract_array_from_json_array_strings():
201200
[["1", "2", "3"], [], ["4", "5"]],
202201
dtype=pd.ArrowDtype(pa.list_(pa.string())),
203202
)
203+
204+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
204205
pd.testing.assert_series_equal(
205-
actual.to_pandas(),
206-
expected.to_pandas(),
206+
actual.to_pandas(allow_large_results=True),
207+
expected.to_pandas(allow_large_results=True),
207208
)
208209

209210

@@ -217,37 +218,45 @@ def test_json_extract_string_array_from_json_strings():
217218
s = bpd.Series(['{"a": ["ab", "2", "3 xy"]}', '{"a": []}', '{"a": ["4","5"]}'])
218219
actual = bbq.json_extract_string_array(s, "$.a")
219220
expected = bpd.Series([["ab", "2", "3 xy"], [], ["4", "5"]])
221+
222+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
220223
pd.testing.assert_series_equal(
221-
actual.to_pandas(),
222-
expected.to_pandas(),
224+
actual.to_pandas(allow_large_results=True),
225+
expected.to_pandas(allow_large_results=True),
223226
)
224227

225228

226229
def test_json_extract_string_array_from_array_strings():
227230
s = bpd.Series(["[1, 2, 3]", "[]", "[4,5]"])
228231
actual = bbq.json_extract_string_array(s)
229232
expected = bpd.Series([["1", "2", "3"], [], ["4", "5"]])
233+
234+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
230235
pd.testing.assert_series_equal(
231-
actual.to_pandas(),
232-
expected.to_pandas(),
236+
actual.to_pandas(allow_large_results=True),
237+
expected.to_pandas(allow_large_results=True),
233238
)
234239

235240

236241
def test_json_extract_string_array_as_float_array_from_array_strings():
237242
s = bpd.Series(["[1, 2.5, 3]", "[]", "[4,5]"])
238243
actual = bbq.json_extract_string_array(s, value_dtype=dtypes.FLOAT_DTYPE)
239244
expected = bpd.Series([[1, 2.5, 3], [], [4, 5]])
245+
246+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
240247
pd.testing.assert_series_equal(
241-
actual.to_pandas(),
242-
expected.to_pandas(),
248+
actual.to_pandas(allow_large_results=True),
249+
expected.to_pandas(allow_large_results=True),
243250
)
244251

245252

246253
def test_json_extract_string_array_w_invalid_series_type():
254+
s = bpd.Series([1, 2])
247255
with pytest.raises(TypeError):
248-
bbq.json_extract_string_array(bpd.Series([1, 2]))
256+
bbq.json_extract_string_array(s)
249257

250258

251259
def test_parse_json_w_invalid_series_type():
260+
s = bpd.Series([1, 2])
252261
with pytest.raises(TypeError):
253-
bbq.parse_json(bpd.Series([1, 2]))
262+
bbq.parse_json(s)

tests/system/small/test_dataframe_io.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def test_to_arrow_override_global_option(scalars_df_index):
281281
assert scalars_df_index._query_job.destination.table_id == table_id
282282

283283

284-
def test_load_json_w_json_string_items(session):
284+
def test_read_gbq_w_json(session):
285285
sql = """
286286
SELECT 0 AS id, JSON_OBJECT('boolean', True) AS json_col,
287287
UNION ALL
@@ -307,7 +307,8 @@ def test_load_json_w_json_string_items(session):
307307
)
308308
),
309309
"""
310-
df = session.read_gbq(sql, index_col="id")
310+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
311+
df = session.read_gbq(sql, index_col="id").to_pandas(allow_large_results=True)
311312

312313
assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
313314

@@ -323,10 +324,12 @@ def test_load_json_w_json_string_items(session):
323324
assert df["json_col"][7] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
324325

325326

326-
def test_load_json_to_pandas_has_correct_result(session):
327+
def test_read_gbq_w_json_and_compare_w_pandas_json(session):
327328
df = session.read_gbq("SELECT JSON_OBJECT('foo', 10, 'bar', TRUE) AS json_col")
328329
assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
329-
result = df.to_pandas()
330+
331+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
332+
result = df.to_pandas(allow_large_results=True)
330333

331334
# These JSON strings are compatible with BigQuery's JSON storage,
332335
pd_df = pd.DataFrame(
@@ -338,7 +341,7 @@ def test_load_json_to_pandas_has_correct_result(session):
338341
pd.testing.assert_series_equal(result["json_col"], pd_df["json_col"])
339342

340343

341-
def test_load_json_in_struct(session):
344+
def test_read_gbq_w_json_in_struct(session):
342345
"""Avoid regressions for internal issue 381148539."""
343346
sql = """
344347
SELECT 0 AS id, STRUCT(JSON_OBJECT('boolean', True) AS data, 1 AS number) AS struct_col
@@ -371,6 +374,9 @@ def test_load_json_in_struct(session):
371374
data = df["struct_col"].struct.field("data")
372375
assert data.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
373376

377+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
378+
data = data.to_pandas(allow_large_results=True)
379+
374380
assert data[0] == '{"boolean":true}'
375381
assert data[1] == '{"int":100}'
376382
assert data[2] == '{"float":0.98}'
@@ -380,7 +386,7 @@ def test_load_json_in_struct(session):
380386
assert data[6] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
381387

382388

383-
def test_load_json_in_array(session):
389+
def test_read_gbq_w_json_in_array(session):
384390
sql = """
385391
SELECT
386392
0 AS id,
@@ -405,17 +411,22 @@ def test_load_json_in_array(session):
405411
assert isinstance(df.dtypes["array_col"], pd.ArrowDtype)
406412
assert isinstance(df.dtypes["array_col"].pyarrow_dtype, pa.ListType)
407413

408-
data = df["array_col"].list
409-
assert data.len()[0] == 7
410-
assert data[0].dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
411-
412-
assert data[0][0] == '{"boolean":true}'
413-
assert data[1][0] == '{"int":100}'
414-
assert data[2][0] == '{"float":0.98}'
415-
assert data[3][0] == '{"string":"hello world"}'
416-
assert data[4][0] == '{"array":[8,9,10]}'
417-
assert data[5][0] == '{"null":null}'
418-
assert data[6][0] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
414+
data = df["array_col"]
415+
assert data.list.len()[0] == 7
416+
assert data.list[0].dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
417+
418+
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
419+
pd_data = data.to_pandas(allow_large_results=True)
420+
421+
assert pd_data.list[0][0] == '{"boolean":true}'
422+
assert pd_data.list[1][0] == '{"int":100}'
423+
assert pd_data.list[2][0] == '{"float":0.98}'
424+
assert pd_data.list[3][0] == '{"string":"hello world"}'
425+
assert pd_data.list[4][0] == '{"array":[8,9,10]}'
426+
assert pd_data.list[5][0] == '{"null":null}'
427+
assert (
428+
pd_data.list[6][0] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
429+
)
419430

420431

421432
def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):

tests/unit/bigquery/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

tests/unit/bigquery/test_json.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest.mock as mock
16+
17+
import pytest
18+
19+
import bigframes.bigquery as bbq
20+
import bigframes.pandas as bpd
21+
22+
23+
def test_json_set_w_invalid_json_path_value_pairs():
24+
mock_series = mock.create_autospec(bpd.pandas.Series, instance=True)
25+
with pytest.raises(ValueError, match="Incorrect format"):
26+
bbq.json_set(mock_series, json_path_value_pairs=[("$.a", 1, 100)]) # type: ignore

0 commit comments

Comments
 (0)