Download data from EUROSTAT
Download data from EUROSTAT¶
see Eurostat query builder
for HICP “prc_hicp_midx”
for rGDP “namq_10_gdp”
for Employment “namq_10_a10_e”
for Unemployment “une_rt_m”
etc.
import requests
import numpy as np
import pandas as pd
COMMON_URL = "http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/"
# select "hicp" or "gdp"
variable_to_fetch = 'gdp'
geo = "EA19"
if variable_to_fetch == 'gdp':
ind_url = f"namq_10_gdp?na_item=B1GQ&sinceTimePeriod=1995Q1&precision=1&geo={geo}&unit=CLV10_MEUR&s_adj=SCA"
freq = 'Qs'
elif variable_to_fetch == 'hicp':
ind_url = f"prc_hicp_midx?sinceTimePeriod=1996M01&precision=3&geo={geo}&unit=I96&coicop=CP00"
freq = 'MS'
series_url = f"{COMMON_URL}{ind_url}"
response = requests.get(series_url, headers=None)
)
assert response.status_code == 200
response_dictionary = response.json()
response_dictionary.keys()
dict_keys(['version', 'label', 'href', 'source', 'updated', 'extension', 'class', 'value', 'dimension', 'id', 'size'])
response_dictionary['dimension']['unit']
{'label': 'unit',
'category': {'index': {'CLV10_MEUR': 0},
'label': {'CLV10_MEUR': 'Chain linked volumes (2010), million euro'}}}
# check geo
[k for k in response_dictionary["dimension"]["geo"]["category"]["label"].keys()][0]
'EA19'
time = np.array([x for x in response_dictionary["dimension"]["time"]["category"]["index"].keys()])
values = np.array([x for x in response_dictionary["value"].values()])
if len(time) > len(values):
time = time[-len(values) :].copy()
data = np.vstack([time, values])
df = pd.DataFrame(data=data.T, columns=["time", "value"])
df.time = pd.to_datetime(df.time.str.replace("M", "-"))
df.set_index(pd.to_datetime(df.time), inplace=True)
df = df.drop('time', axis=1)
df
value | |
---|---|
time | |
1995-01-01 | 1837419.9 |
1995-04-01 | 1847658.7 |
1995-07-01 | 1851717.6 |
1995-10-01 | 1858277.7 |
1996-01-01 | 1861887.8 |
... | ... |
2020-10-01 | 2569399.0 |
2021-01-01 | 2564113.0 |
2021-04-01 | 2620528.3 |
2021-07-01 | 2679880.7 |
2021-10-01 | 2687644.7 |
108 rows × 1 columns
df = df.asfreq(freq)
df.tail()
value | |
---|---|
time | |
2020-10-01 | 2569399.0 |
2021-01-01 | 2564113.0 |
2021-04-01 | 2620528.3 |
2021-07-01 | 2679880.7 |
2021-10-01 | 2687644.7 |