DOC: io/v0.13/release notes

jreback · jreback · commit e796c275fd39 · 2013-10-03T16:59:00.000-04:00
CLN: py3 updates
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -1230,6 +1230,37 @@ nanoseconds
    import os
    os.remove('test.json')
 
+.. _io.json_normalize:
+
+Normalization
+~~~~~~~~~~~~~
+
+.. versionadded:: 0.13.0
+
+Pandas provides a utility function to take a dict or list of dicts and *normalize* this semi-structured data
+into a flat table.
+
+.. ipython:: python
+
+   from pandas.io.json import json_normalize
+   data = [{'state': 'Florida',
+             'shortname': 'FL',
+             'info': {
+                  'governor': 'Rick Scott'
+             },
+             'counties': [{'name': 'Dade', 'population': 12345},
+                         {'name': 'Broward', 'population': 40000},
+                         {'name': 'Palm Beach', 'population': 60000}]},
+            {'state': 'Ohio',
+             'shortname': 'OH',
+             'info': {
+                  'governor': 'John Kasich'
+             },
+             'counties': [{'name': 'Summit', 'population': 1234},
+                          {'name': 'Cuyahoga', 'population': 1337}]}]
+
+   json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])
+
 HTML
 ----
 
@@ -1244,7 +1275,7 @@ Reading HTML Content
 
 .. _io.read_html:
 
-.. versionadded:: 0.12
+.. versionadded:: 0.12.0
 
 The top-level :func:`~pandas.io.html.read_html` function can accept an HTML
 string/file/url and will parse HTML tables into list of pandas DataFrames.
@@ -1620,7 +1651,7 @@ advanced strategies
 
 .. note::
 
-   The prior method of accessing Excel is now deprecated as of 0.12,
+   The prior method of accessing Excel is now deprecated as of 0.12.0,
    this will work but will be removed in a future version.
 
       .. code-block:: python
@@ -2291,7 +2322,7 @@ The default is 50,000 rows returned in a chunk.
 
 .. note::
 
-   .. versionadded:: 0.12
+   .. versionadded:: 0.12.0
 
    You can also use the iterator with ``read_hdf`` which will open, then
    automatically close the store when finished iterating.
@@ -2580,7 +2611,7 @@ Pass ``min_itemsize`` on the first table creation to a-priori specifiy the minim
 ``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to
 allow all *indexables* or *data_columns* to have this min_itemsize.
 
-Starting in 0.11, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically.
+Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically.
 
 .. note::
 
@@ -2860,7 +2891,7 @@ Reading from STATA format
 
 .. _io.stata_reader:
 
-.. versionadded:: 0.12
+.. versionadded:: 0.12.0
 
 The top-level function ``read_stata`` will read a dta format file
 and return a DataFrame:
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -169,6 +169,8 @@ Improvements to existing features
     high-dimensional arrays).
   - :func:`~pandas.read_html` now supports the ``parse_dates``,
     ``tupleize_cols`` and ``thousands`` parameters (:issue:`4770`).
+  - :meth:`~pandas.io.json.json_normalize` is a new method to allow you to create a flat table
+    from semi-structured JSON data. :ref:`See the docs<io.json_normalize>` (:issue:`1067`)
 
 API Changes
 ~~~~~~~~~~~
diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -490,6 +490,8 @@ Enhancements
   - ``tz_localize`` can infer a fall daylight savings transition based on the structure
     of the unlocalized data (:issue:`4230`), see :ref:`here<timeseries.timezone>`
   - DatetimeIndex is now in the API documentation, see :ref:`here<api.datetimeindex>`
+  - :meth:`~pandas.io.json.json_normalize` is a new method to allow you to create a flat table
+    from semi-structured JSON data. :ref:`See the docs<io.json_normalize>` (:issue:`1067`)
 
 .. _whatsnew_0130.experimental:
 
diff --git a/pandas/io/json.py b/pandas/io/json.py
@@ -1,6 +1,7 @@
 # pylint: disable-msg=E1101,W0613,W0603
 
 import os
+import copy
 from collections import defaultdict
 import numpy as np
 
@@ -570,8 +571,11 @@ def nested_to_record(ds,prefix="",level=0):
         ds = [ds]
         singleton = True
 
+    new_ds = []
     for d in ds:
-        for k,v in d.items(): # modifying keys inside loop, not lazy
+
+        new_d = copy.deepcopy(d)
+        for k,v in d.items():
             # each key gets renamed with prefix
             if level == 0:
                 newkey = str(k)
@@ -582,16 +586,17 @@ def nested_to_record(ds,prefix="",level=0):
             # only at level>1 do we rename the rest of the keys
             if not isinstance(v,dict):
                 if level!=0: # so we skip copying for top level, common case
-                    v = d.pop(k)
-                    d[newkey]= v
+                    v = new_d.pop(k)
+                    new_d[newkey]= v
                 continue
             else:
-                v = d.pop(k)
-                d.update(nested_to_record(v,newkey,level+1))
+                v = new_d.pop(k)
+                new_d.update(nested_to_record(v,newkey,level+1))
+        new_ds.append(new_d)
 
     if singleton:
-        return ds[0]
-    return ds
+        return new_ds[0]
+    return new_ds
 
 
 def json_normalize(data, record_path=None, meta=None,
@@ -658,7 +663,7 @@ def _pull_field(js, spec):
         data = [data]
 
     if record_path is None:
-        if any([isinstance(x,dict) for x in data[0].itervalues()]):
+        if any([isinstance(x,dict) for x in compat.itervalues(data[0])]):
             # naive normalization, this is idempotent for flat records
             # and potentially will inflate the data considerably for
             # deeply nested structures:
@@ -719,7 +724,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
         result.rename(columns=lambda x: record_prefix + x, inplace=True)
 
     # Data types, a problem
-    for k, v in meta_vals.iteritems():
+    for k, v in compat.iteritems(meta_vals):
         if meta_prefix is not None:
             k = meta_prefix + k