1
1
# pylint: disable-msg=E1101,W0613,W0603
2
+ from itertools import islice
3
+ from pandas import concat
2
4
import os
3
5
import numpy as np
4
6
@@ -174,7 +176,7 @@ def write(self):
174
176
def read_json (path_or_buf = None , orient = None , typ = 'frame' , dtype = True ,
175
177
convert_axes = True , convert_dates = True , keep_default_dates = True ,
176
178
numpy = False , precise_float = False , date_unit = None , encoding = None ,
177
- lines = False ):
179
+ lines = False , chunksize = None ):
178
180
"""
179
181
Convert a JSON string to pandas object
180
182
@@ -263,6 +265,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
263
265
264
266
.. versionadded:: 0.19.0
265
267
268
+ chunksize: integer, default None
269
+ if lines is True, how many lines to read into memory at a time.
270
+ This is helpful if the file is large, but also slower. Larger chunks
271
+ are faster, but more likely to hit memory bounds. Also note this is
272
+ different from the `chunksize` parameter in `read_csv`, which returns
273
+ a FileTextReader.
274
+
266
275
Returns
267
276
-------
268
277
result : Series or DataFrame, depending on the value of `typ`.
@@ -334,8 +343,27 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
334
343
if exists :
335
344
fh , handles = _get_handle (filepath_or_buffer , 'r' ,
336
345
encoding = encoding )
337
- json = fh .read ()
338
- fh .close ()
346
+ if lines :
347
+ if not chunksize :
348
+ chunksize = 10000
349
+ return_val = None
350
+ while True :
351
+ lines = list (islice (fh , chunksize ))
352
+ if lines :
353
+ lines_json = '[' + ',' .join (lines ) + ']'
354
+ obj = _get_obj (typ , lines_json , orient , dtype , convert_axes , convert_dates , keep_default_dates , numpy , precise_float , date_unit )
355
+ if not return_val :
356
+ obj = return_val
357
+ else :
358
+ return_val = concat ([return_val , obj ])
359
+
360
+ else :
361
+ break
362
+ fh .close ()
363
+ return return_val
364
+ else :
365
+ json = fh .read ()
366
+ fh .close ()
339
367
else :
340
368
json = filepath_or_buffer
341
369
elif hasattr (filepath_or_buffer , 'read' ):
@@ -349,6 +377,11 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
349
377
lines = list (StringIO (json .strip ()))
350
378
json = '[' + ',' .join (lines ) + ']'
351
379
380
+ return _get_obj (typ , json , orient , dtype , convert_axes , convert_dates , keep_default_dates , numpy , precise_float , date_unit )
381
+
382
+ def _get_obj (typ , json , orient , dtype , convert_axes , convert_dates ,
383
+ keep_default_dates , numpy , precise_float ,
384
+ date_unit ):
352
385
obj = None
353
386
if typ == 'frame' :
354
387
obj = FrameParser (json , orient , dtype , convert_axes , convert_dates ,
0 commit comments