diff --git a/doc/source/release.rst b/doc/source/release.rst index ccc34a4051508..07efacc0bf641 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -219,6 +219,7 @@ Improvements to existing features option it is no longer possible to round trip Excel files with merged MultiIndex and Hierarchical Rows. Set the ``merge_cells`` to ``False`` to restore the previous behaviour. (:issue:`5254`) + - The FRED DataReader now accepts multiple series (:issue`3413`) API Changes ~~~~~~~~~~~ diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst index b950876738852..1c9893ec7bd02 100644 --- a/doc/source/remote_data.rst +++ b/doc/source/remote_data.rst @@ -80,7 +80,9 @@ FRED gdp=web.DataReader("GDP", "fred", start, end) gdp.ix['2013-01-01'] - + # Multiple series: + inflation = web.DataReader(["CPIAUCSL", "CPILFESL"], "fred", start, end) + inflation.head() .. _remote_data.ff: Fama/French diff --git a/pandas/io/data.py b/pandas/io/data.py index cb9f096a1d07a..cf49515cac576 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -17,7 +17,7 @@ ) import pandas.compat as compat from pandas import Panel, DataFrame, Series, read_csv, concat -from pandas.core.common import PandasError +from pandas.core.common import is_list_like, PandasError from pandas.io.parsers import TextParser from pandas.io.common import urlopen, ZipFile, urlencode from pandas.util.testing import _network_error_classes @@ -41,8 +41,9 @@ def DataReader(name, data_source=None, start=None, end=None, Parameters ---------- - name : str - the name of the dataset + name : str or list of strs + the name of the dataset. Some data sources (yahoo, google, fred) will + accept a list of names. data_source: str the data source ("yahoo", "google", "fred", or "ff") start : {datetime, None} @@ -436,24 +437,37 @@ def get_data_fred(name, start=dt.datetime(2010, 1, 1), Date format is datetime Returns a DataFrame. + + If multiple names are passed for "series" then the index of the + DataFrame is the outer join of the indicies of each series. """ start, end = _sanitize_dates(start, end) fred_URL = "http://research.stlouisfed.org/fred2/series/" - url = fred_URL + '%s' % name + '/downloaddata/%s' % name + '.csv' - with urlopen(url) as resp: - data = read_csv(resp, index_col=0, parse_dates=True, - header=None, skiprows=1, names=["DATE", name], - na_values='.') - try: - return data.truncate(start, end) - except KeyError: - if data.ix[3].name[7:12] == 'Error': - raise IOError("Failed to get the data. Check that {0!r} is " - "a valid FRED series.".format(name)) - raise + if not is_list_like(name): + names = [name] + else: + names = name + urls = [fred_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for + n in names] + + def fetch_data(url, name): + with urlopen(url) as resp: + data = read_csv(resp, index_col=0, parse_dates=True, + header=None, skiprows=1, names=["DATE", name], + na_values='.') + try: + return data.truncate(start, end) + except KeyError: + if data.ix[3].name[7:12] == 'Error': + raise IOError("Failed to get the data. Check that {0!r} is " + "a valid FRED series.".format(name)) + raise + df = concat([fetch_data(url, n) for url, n in zip(urls, names)], + axis=1, join='outer') + return df def get_data_famafrench(name): # path of zip files diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py index 4e2331f05001d..8ba770aa31939 100644 --- a/pandas/io/tests/test_data.py +++ b/pandas/io/tests/test_data.py @@ -16,6 +16,10 @@ import pandas.util.testing as tm from numpy.testing import assert_array_equal +if compat.PY3: + from urllib.error import HTTPError +else: + from urllib2 import HTTPError def _skip_if_no_lxml(): try: @@ -422,6 +426,24 @@ def test_invalid_series(self): name = "NOT A REAL SERIES" self.assertRaises(Exception, web.get_data_fred, name) + @network + def test_fred_multi(self): + names = ['CPIAUCSL', 'CPALTT01USQ661S', 'CPILFESL'] + start = datetime(2010, 1, 1) + end = datetime(2013, 1, 27) + + received = web.DataReader(names, "fred", start, end).head(1) + expected = DataFrame([[217.478, 0.99701529, 220.544]], columns=names, + index=[pd.tslib.Timestamp('2010-01-01 00:00:00')]) + expected.index.rename('DATE', inplace=True) + assert_frame_equal(received, expected, check_less_precise=True) + + @network + def test_fred_multi_bad_series(self): + + names = ['NOTAREALSERIES', 'CPIAUCSL', "ALSO FAKE"] + with tm.assertRaises(HTTPError): + DataReader(names, data_source="fred") if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],