Download data from EUROSTAT

Download data from EUROSTAT

see Eurostat query builder

  • for HICP “prc_hicp_midx”

  • for rGDP “namq_10_gdp”

  • for Employment “namq_10_a10_e”

  • for Unemployment “une_rt_m”

  • etc.

import requests
import numpy as np
import pandas as pd
COMMON_URL = "http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/"
# select "hicp" or "gdp"
variable_to_fetch = 'gdp'
geo = "EA19"

if variable_to_fetch == 'gdp':
    ind_url = f"namq_10_gdp?na_item=B1GQ&sinceTimePeriod=1995Q1&precision=1&geo={geo}&unit=CLV10_MEUR&s_adj=SCA"
    freq = 'Qs'
elif variable_to_fetch == 'hicp':
    ind_url = f"prc_hicp_midx?sinceTimePeriod=1996M01&precision=3&geo={geo}&unit=I96&coicop=CP00"
    freq = 'MS'
series_url = f"{COMMON_URL}{ind_url}"

response  = requests.get(series_url, headers=None)    
                      )
assert response.status_code == 200    
response_dictionary = response.json()
response_dictionary.keys()
dict_keys(['version', 'label', 'href', 'source', 'updated', 'extension', 'class', 'value', 'dimension', 'id', 'size'])
response_dictionary['dimension']['unit']
{'label': 'unit',
 'category': {'index': {'CLV10_MEUR': 0},
  'label': {'CLV10_MEUR': 'Chain linked volumes (2010), million euro'}}}
# check geo
[k for k in response_dictionary["dimension"]["geo"]["category"]["label"].keys()][0]
'EA19'
time = np.array([x for x in response_dictionary["dimension"]["time"]["category"]["index"].keys()])
values = np.array([x for x in response_dictionary["value"].values()])
if len(time) > len(values):
    time = time[-len(values) :].copy()
data = np.vstack([time, values])
df = pd.DataFrame(data=data.T, columns=["time", "value"])
df.time = pd.to_datetime(df.time.str.replace("M", "-"))
df.set_index(pd.to_datetime(df.time), inplace=True)
df = df.drop('time', axis=1)
df
value
time
1995-01-01 1837419.9
1995-04-01 1847658.7
1995-07-01 1851717.6
1995-10-01 1858277.7
1996-01-01 1861887.8
... ...
2020-10-01 2569399.0
2021-01-01 2564113.0
2021-04-01 2620528.3
2021-07-01 2679880.7
2021-10-01 2687644.7

108 rows × 1 columns

df = df.asfreq(freq)
df.tail()
value
time
2020-10-01 2569399.0
2021-01-01 2564113.0
2021-04-01 2620528.3
2021-07-01 2679880.7
2021-10-01 2687644.7