Skip to content

Commit e055046

Browse files
google-labs-jules[bot]chelsea-lin
authored andcommitted
feat: add bbq.json_query_array and warn bbq.json_extract_array deprecated
1 parent e403528 commit e055046

File tree

3 files changed

+116
-0
lines changed

3 files changed

+116
-0
lines changed

bigframes/bigquery/_operations/json.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ def json_extract_array(
133133
`STRING` or `JSON` values. This function uses single quotes and brackets to
134134
escape invalid JSONPath characters in JSON keys.
135135
136+
.. deprecated:: 2.5.0
137+
The ``json_extract_array`` is deprecated and will be removed in a future version.
138+
Use ``json_query_array`` instead.
139+
136140
**Examples:**
137141
138142
>>> import bigframes.pandas as bpd
@@ -172,6 +176,11 @@ def json_extract_array(
172176
Returns:
173177
bigframes.series.Series: A new Series with the parsed arrays from the input.
174178
"""
179+
msg = (
180+
"The `json_extract_array` is deprecated and will be removed in a future version. "
181+
"Use `json_query_array` instead."
182+
)
183+
warnings.warn(bfe.format_message(msg), category=UserWarning)
175184
return input._apply_unary_op(ops.JSONExtractArray(json_path=json_path))
176185

177186

@@ -273,6 +282,56 @@ def json_query(
273282
return input._apply_unary_op(ops.JSONQuery(json_path=json_path))
274283

275284

285+
def json_query_array(
286+
input: series.Series,
287+
json_path: str = "$",
288+
) -> series.Series:
289+
"""Extracts a JSON array and converts it to a SQL array of JSON-formatted
290+
`STRING` or `JSON` values. This function uses double quotes to escape invalid
291+
JSONPath characters in JSON keys. For example: `"a.b"`.
292+
293+
**Examples:**
294+
295+
>>> import bigframes.pandas as bpd
296+
>>> import bigframes.bigquery as bbq
297+
>>> bpd.options.display.progress_bar = None
298+
299+
>>> s = bpd.Series(['[1, 2, 3]', '[4, 5]'])
300+
>>> bbq.json_query_array(s)
301+
0 ['1' '2' '3']
302+
1 ['4' '5']
303+
dtype: list<item: string>[pyarrow]
304+
305+
>>> s = bpd.Series([
306+
... '{"fruits": [{"name": "apple"}, {"name": "cherry"}]}',
307+
... '{"fruits": [{"name": "guava"}, {"name": "grapes"}]}'
308+
... ])
309+
>>> bbq.json_query_array(s, "$.fruits")
310+
0 ['{"name":"apple"}' '{"name":"cherry"}']
311+
1 ['{"name":"guava"}' '{"name":"grapes"}']
312+
dtype: list<item: string>[pyarrow]
313+
314+
>>> s = bpd.Series([
315+
... '{"fruits": {"color": "red", "names": ["apple","cherry"]}}',
316+
... '{"fruits": {"color": "green", "names": ["guava", "grapes"]}}'
317+
... ])
318+
>>> bbq.json_query_array(s, "$.fruits.names")
319+
0 ['"apple"' '"cherry"']
320+
1 ['"guava"' '"grapes"']
321+
dtype: list<item: string>[pyarrow]
322+
323+
Args:
324+
input (bigframes.series.Series):
325+
The Series containing JSON data (as native JSON objects or JSON-formatted strings).
326+
json_path (str):
327+
The JSON path identifying the data that you want to obtain from the input.
328+
329+
Returns:
330+
bigframes.series.Series: A new Series with the parsed arrays from the input.
331+
"""
332+
return input._apply_unary_op(ops.JSONQueryArray(json_path=json_path))
333+
334+
276335
def json_value(
277336
input: series.Series,
278337
json_path: str,

bigframes/operations/json_ops.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,23 @@ def output_type(self, *input_types):
3737
return input_type
3838

3939

40+
@dataclasses.dataclass(frozen=True)
41+
class JSONQueryArray(base_ops.UnaryOp):
42+
name: typing.ClassVar[str] = "json_query_array"
43+
json_path: str
44+
45+
def output_type(self, *input_types):
46+
input_type = input_types[0]
47+
if not dtypes.is_json_like(input_type):
48+
raise TypeError(
49+
"Input type must be a valid JSON object or JSON-formatted string type."
50+
+ f" Received type: {input_type}"
51+
)
52+
return pd.ArrowDtype(
53+
pa.list_(dtypes.bigframes_dtype_to_arrow_dtype(input_type))
54+
)
55+
56+
4057
@dataclasses.dataclass(frozen=True)
4158
class JSONExtractArray(base_ops.UnaryOp):
4259
name: typing.ClassVar[str] = "json_extract_array"

tests/unit/bigquery/test_json.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,49 @@
1818

1919
import bigframes.bigquery as bbq
2020
import bigframes.pandas as bpd
21+
import bigframes.operations as ops
22+
import bigframes.dtypes as dtypes
2123

2224

2325
def test_json_set_w_invalid_json_path_value_pairs():
2426
mock_series = mock.create_autospec(bpd.pandas.Series, instance=True)
2527
with pytest.raises(ValueError, match="Incorrect format"):
2628
bbq.json_set(mock_series, json_path_value_pairs=[("$.a", 1, 100)]) # type: ignore
29+
30+
31+
def test_json_query_array_specific_path():
32+
mock_input_series = mock.create_autospec(bpd.Series, instance=True)
33+
# Ensure the mock series has a dtype that is_json_like
34+
mock_input_series.dtype = dtypes.STRING_DTYPE
35+
36+
bbq.json_query_array(mock_input_series, json_path="$.items")
37+
38+
mock_input_series._apply_unary_op.assert_called_once_with(
39+
ops.JSONQueryArray(json_path="$.items")
40+
)
41+
42+
def test_json_query_array_default_path():
43+
mock_input_series = mock.create_autospec(bpd.Series, instance=True)
44+
# Ensure the mock series has a dtype that is_json_like
45+
mock_input_series.dtype = dtypes.JSON_DTYPE
46+
47+
bbq.json_query_array(mock_input_series) # Default path "$"
48+
49+
mock_input_series._apply_unary_op.assert_called_once_with(
50+
ops.JSONQueryArray(json_path="$")
51+
)
52+
53+
def test_json_query_array_input_type_validation_passes_with_json_like():
54+
# This test is more about the op itself, but we can ensure the function doesn't break it.
55+
# Assumes the op's output_type method will be invoked during series operation.
56+
# This kind of test might be more suitable for operation tests if they exist.
57+
# For now, just ensure the call goes through.
58+
mock_input_series = mock.create_autospec(bpd.Series, instance=True)
59+
mock_input_series.dtype = dtypes.STRING_DTYPE
60+
bbq.json_query_array(mock_input_series)
61+
mock_input_series._apply_unary_op.assert_called_once()
62+
63+
mock_input_series_json = mock.create_autospec(bpd.Series, instance=True)
64+
mock_input_series_json.dtype = dtypes.JSON_DTYPE
65+
bbq.json_query_array(mock_input_series_json)
66+
mock_input_series_json._apply_unary_op.assert_called_once()

0 commit comments

Comments
 (0)