pydata
diff --git a/‎docs/source/remote_data.rst‎
Lines changed: 20 additions & 0 deletions b/‎docs/source/remote_data.rst‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎docs/source/whatsnew.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/whatsnew.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/whatsnew/v0.2.0.txt‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/whatsnew/v0.2.0.txt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/whatsnew/v0.2.1.txt‎
Lines changed: 32 additions & 0 deletions b/‎docs/source/whatsnew/v0.2.1.txt‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎pandas_datareader/data.py‎
Lines changed: 5 additions & 0 deletions b/‎pandas_datareader/data.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎pandas_datareader/eurostat.py‎
Lines changed: 44 additions & 0 deletions b/‎pandas_datareader/eurostat.py‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎pandas_datareader/io/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎pandas_datareader/io/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas_datareader/io/sdmx.py‎
Lines changed: 203 additions & 0 deletions b/‎pandas_datareader/io/sdmx.py‎
Lines changed: 203 additions & 0 deletions
@@ -29,6 +29,7 @@ Currently the following sources are supported:
     - :ref:`Kenneth French's data library<remote_data.ff>`
     - :ref:`World Bank<remote_data.wb>`
     - :ref:`OECD<remote_data.oecd>`
+    - :ref:`Eurostat<remote_data.eurostat>`
 
 It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ.
 
@@ -365,3 +366,22 @@ example is to download "Trade Union Density" data which set code is "UN_DEN".
 
     df[['Japan', 'United States']]
 
+.. _remote_data.eurostat:
+
+Eurostat
+========
+
+`Eurostat <http://ec.europa.eu/eurostat/>`__ are avaliable via ``DataReader``.
+
+Get ` Rail accidents by type of accident (ERA data) <http://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=tran_sf_railac&lang=en>`_ data. The result will be a ``DataFrame`` which has ``DatetimeIndex`` as index and ``MultiIndex`` of attributes or countries as column. The target URL is:
+
+* http://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=tran_sf_railac&lang=en
+
+You can specify dataset ID "tran_sf_railac" to get corresponding data via ``DataReader``.
+
+.. ipython:: python
+
+    import pandas_datareader.data as web
+
+    df = web.DataReader("tran_sf_railac", 'eurostat')
+    df
@@ -18,5 +18,6 @@ What's New
 
 These are new features and improvements of note in each release.
 
+.. include:: whatsnew/v0.2.1.txt
 .. include:: whatsnew/v0.2.0.txt
 
@@ -22,12 +22,12 @@ New features
 - Added get_available_datasets to famafrench (:issue:`56`).
 - ``DataReader`` now supports OECD data sources, see :ref:`here<remote_data.oecd>` (:issue:`101`).
 
-.. _whatsnew_0170.api:
+.. _whatsnew_020.api_breaking:
 
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. _whatsnew_020.api_breaking:
+
 - Fama French indexes are not Pandas.PeriodIndex for annual and montly data, and
   pandas.DatetimeIndex otherwise (:issue:`56`).
 
 
@@ -0,0 +1,32 @@
+.. _whatsnew_021:
+
+v0.2.1 (XXX)
+----------------------------
+
+This is a minor release from 0.2.0 and includes new features and a number of bug fixes.
+
+
+Highlights include:
+
+
+.. contents:: What's new in v0.2.1
+    :local:
+    :backlinks: none
+
+.. _whatsnew_020.enhancements:
+
+New features
+~~~~~~~~~~~~
+
+- ``DataReader`` now supports Eurostat data sources, see :ref:`here<remote_data.eurostat>` (:issue:`101`).
+
+.. _whatsnew_021.api_breaking:
+
+Backwards incompatible API changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _whatsnew_021.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~
+
@@ -13,6 +13,7 @@
 from pandas_datareader.yahoo.components import _get_data as get_components_yahoo
 from pandas_datareader.yahoo.options import Options as YahooOptions
 
+from pandas_datareader.eurostat import EurostatReader
 from pandas_datareader.fred import FredReader
 from pandas_datareader.famafrench import FamaFrenchReader
 from pandas_datareader.oecd import OECDReader
@@ -117,6 +118,10 @@ def DataReader(name, data_source=None, start=None, end=None,
         return OECDReader(symbols=name, start=start, end=end,
                           retry_count=retry_count, pause=pause,
                           session=session).read()
+    elif data_source == "eurostat":
+        return EurostatReader(symbols=name, start=start, end=end,
+                              retry_count=retry_count, pause=pause,
+                              session=session).read()
     else:
         raise NotImplementedError(
                 "data_source=%r is not implemented" % data_source)
 
@@ -0,0 +1,44 @@
+from __future__ import unicode_literals
+
+import pandas as pd
+import pandas.compat as compat
+
+from pandas_datareader.io.sdmx import read_sdmx, _read_sdmx_dsd
+from pandas_datareader.base import _BaseReader
+
+
+class EurostatReader(_BaseReader):
+
+    """Get data for the given name from Eurostat."""
+
+    _URL = 'http://www.ec.europa.eu/eurostat/SDMX/diss-web/rest'
+
+    @property
+    def url(self):
+        if not isinstance(self.symbols, compat.string_types):
+            raise ValueError('data name must be string')
+
+        return '{0}/data/{1}/?'.format(self._URL, self.symbols)
+
+    @property
+    def dsd_url(self):
+        if not isinstance(self.symbols, compat.string_types):
+            raise ValueError('data name must be string')
+
+        return '{0}/datastructure/ESTAT/DSD_{1}'.format(self._URL, self.symbols)
+
+    def _read_one_data(self, url, params):
+        resp_dsd = self._get_response(self.dsd_url)
+        dsd = _read_sdmx_dsd(resp_dsd.content)
+
+        resp = self._get_response(url)
+        data = read_sdmx(resp.content, dsd=dsd)
+
+        try:
+            data.index = pd.to_datetime(data.index)
+            data = data.sort_index()
+            data = data.truncate(self.start, self.end)
+        except ValueError:
+            pass
+        return data
+
@@ -1 +1,2 @@
-from pandas_datareader.io.jsdmx import read_jsdmx
+from pandas_datareader.io.jsdmx import read_jsdmx
+from pandas_datareader.io.sdmx import read_sdmx
@@ -0,0 +1,203 @@
+from __future__ import unicode_literals
+
+import collections
+import os
+
+import numpy as np
+import pandas as pd
+import pandas.compat as compat
+
+from pandas_datareader.io.util import _read_content
+
+
+_STRUCTURE = '{http://www.sdmx.org/resources/sdmxml/schemas/v2_1/structure}'
+_MESSAGE = '{http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message}'
+_GENERIC = '{http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic}'
+_COMMON = '{http://www.sdmx.org/resources/sdmxml/schemas/v2_1/common}'
+_XML = '{http://www.w3.org/XML/1998/namespace}'
+
+_DATASET = _MESSAGE + 'DataSet'
+_SERIES = _GENERIC + 'Series'
+_SERIES_KEY = _GENERIC + 'SeriesKey'
+_OBSERVATION = _GENERIC + 'Obs'
+_VALUE = _GENERIC + 'Value'
+_OBSDIMENSION = _GENERIC + 'ObsDimension'
+_OBSVALUE = _GENERIC + 'ObsValue'
+_CODE = _STRUCTURE + 'Code'
+_TIMEDIMENSION = _STRUCTURE + 'TimeDimension'
+
+
+def read_sdmx(path_or_buf, dtype='float64', dsd=None):
+    """
+    Convert a SDMX-XML string to pandas object
+
+    Parameters
+    ----------
+    filepath_or_buffer : a valid SDMX-XML string or file-like
+        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page
+    dtype : str
+        dtype to coerce values
+    dsd : dict
+        parsed DSD dict corresponding to the SDMX-XML data
+
+    Returns
+    -------
+    results : Series, DataFrame, or dictionaly of Series or DataFrame.
+    """
+
+    xdata = _read_content(path_or_buf)
+
+    import xml.etree.ElementTree as ET
+    root = ET.fromstring(xdata)
+
+    structure = _get_child(root, _MESSAGE + 'Structure')
+    idx_name = structure.get('dimensionAtObservation')
+    dataset = _get_child(root, _DATASET)
+
+    keys = []
+    obss = []
+
+    for series in dataset.iter(_SERIES):
+        key = _parse_series_key(series)
+        obs = _parse_observations(series.iter(_OBSERVATION))
+        keys.append(key)
+        obss.append(obs)
+
+    mcols = _construct_index(keys, dsd=dsd)
+    mseries = _construct_series(obss, name=idx_name, dsd=dsd)
+
+    df = pd.DataFrame(mseries, dtype=dtype)
+    df = df.T
+    df.columns = mcols
+
+    return df
+
+
+def _construct_series(values, name, dsd=None):
+
+    # ts defines attributes to be handled as times
+    times = dsd.ts if dsd is not None else []
+
+    if len(values) < 1:
+        raise ValueError("Data contains no 'Series'")
+    results = []
+    for value in values:
+
+        if name in times:
+            idx = pd.DatetimeIndex([v[0] for v in value], name=name)
+        else:
+            idx = pd.Index([v[0] for v in value], name=name)
+
+        results.append(pd.Series([v[1] for v in value], index=idx))
+    return results
+
+
+def _construct_index(keys, dsd=None):
+
+    # code defines a mapping to key's internal code to its representation
+    codes = dsd.codes if dsd is not None else {}
+
+    if len(keys) < 1:
+        raise ValueError("Data contains no 'Series'")
+    names = [t[0] for t in keys[0]]
+    values = {}
+    # initialize
+    for key in keys:
+        for name, value in key:
+            # apply DSD
+            try:
+                value = codes[name][value]
+            except KeyError:
+                pass
+
+            try:
+                values[name].append(value)
+            except KeyError:
+                values[name] = [value]
+
+    midx = pd.MultiIndex.from_arrays([values[name] for name in names], names=names)
+    return midx
+
+
+def _parse_observations(observations):
+    results = []
+    for observation in observations:
+        obsdimension = _get_child(observation, _OBSDIMENSION)
+        obsvalue = _get_child(observation, _OBSVALUE)
+        results.append((obsdimension.get('value'), obsvalue.get('value')))
+    # return list of key/value tuple, eg: [(key, value), ...]
+    return results
+
+
+def _parse_series_key(series):
+    serieskey = _get_child(series, _SERIES_KEY)
+    key_values = serieskey.iter(_VALUE)
+    keys = [(key.get('id'), key.get('value')) for key in key_values]
+    # return list of key/value tuple, eg: [(key, value), ...]
+    return keys
+
+
+def _get_child(element, key):
+    elements = list(element.iter(key))
+    if len(elements) == 1:
+        return elements[0]
+    elif len(elements) == 0:
+        raise ValueError("Element {0} contains no {1}".format(element.tag, key))
+    else:
+        raise ValueError("Element {0} contains multiple {1}".format(element.tag, key))
+
+
+_NAME_EN = ".//{0}Name[@{1}lang='en']".format(_COMMON, _XML)
+
+
+def _get_english_name(element):
+    name = element.find(_NAME_EN).text
+    return name
+
+
+
+SDMXCode = collections.namedtuple('SDMXCode', ['codes', 'ts'])
+
+
+def _read_sdmx_dsd(path_or_buf):
+    """
+    Convert a SDMX-XML DSD string to mapping dictionary
+
+    Parameters
+    ----------
+    filepath_or_buffer : a valid SDMX-XML DSD string or file-like
+        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page
+
+    Returns
+    -------
+    results : namedtuple (SDMXCode)
+    """
+
+    xdata = _read_content(path_or_buf)
+
+    import xml.etree.cElementTree as ET
+    root = ET.fromstring(xdata)
+
+    structure = _get_child(root, _MESSAGE + 'Structures')
+    codes = _get_child(structure, _STRUCTURE + 'Codelists')
+    # concepts = _get_child(structure, _STRUCTURE + 'Concepts')
+    datastructures = _get_child(structure, _STRUCTURE + 'DataStructures')
+
+    code_results = {}
+    for codelist in codes:
+        # codelist_id = codelist.get('id')
+        codelist_name = _get_english_name(codelist)
+        mapper = {}
+        for code in codelist.iter(_CODE):
+            code_id = code.get('id')
+            name = _get_english_name(code)
+            mapper[code_id] = name
+        # codeobj = SDMXCode(id=codelist_id, name=codelist_name, mapper=mapper)
+        # code_results[codelist_id] = codeobj
+        code_results[codelist_name] = mapper
+
+    times = list(datastructures.iter(_TIMEDIMENSION))
+    times = [t.get('id') for t in times]
+
+    result = SDMXCode(codes=code_results, ts=times)
+    return result
Original file line number	Diff line number	Diff line change
`@@ -18,5 +18,6 @@ What's New`
`18`	`18`
`19`	`19`	`These are new features and improvements of note in each release.`
`20`	`20`
	`21`	`+.. include:: whatsnew/v0.2.1.txt`
`21`	`22`	`.. include:: whatsnew/v0.2.0.txt`
`22`	`23`
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-from pandas_datareader.io.jsdmx import read_jsdmx`
	`1`	`+from pandas_datareader.io.jsdmx import read_jsdmx`
	`2`	`+from pandas_datareader.io.sdmx import read_sdmx`