Dataset "Fallzahlen in Deutschland" by the Robert Koch-Institut. Published under Data Licence Germany – Attribution – Version 2.0: Full License
Corona Statistics¶
First, we download and prepare the RKI data:
In [2]:
csv_file = 'rki_data.csv'
csv_url = 'https://www.arcgis.com/sharing/rest/content/items/f10774f1c63e40168479a1feb6c7ca74/data'
# Columns to parse as date.
date_columns = ['Meldedatum', 'Refdatum']
# Load data.
if os.path.exists(csv_file) and os.stat(csv_file).st_mtime > time.time() - (1 * 24 * 60 * 60):
last_update = datetime.datetime.fromtimestamp(os.stat(csv_file).st_mtime)
rki_data = pandas.read_csv(csv_file, parse_dates=date_columns)
print("Used cached numbers from disk: ", last_update)
else:
rki_data = pandas.read_csv(csv_url, parse_dates=date_columns)
rki_data.to_csv(csv_file, index=False)
print("Downloaded fresh numbers from RKI: ", datetime.datetime.now())
# Sort all data by reference date.
rki_data.sort_values('Refdatum', inplace=True)
rki_data.set_index('FID')
# Prepare cumulative sums to know how many cases we had before a certain point.
for k in ['Fall', 'Todesfall', 'Genesen']:
rki_data['Summe' + k] = rki_data[['Landkreis', 'Anzahl'+k]].groupby('Landkreis').cumsum()
Used cached numbers from disk: 2022-06-23 10:34:42.228324
Here is the format of data we have from the RKI database, with added sums. I transposed it to fit the page width:
In [3]:
rki_data.sample(2).transpose()
Out[3]:
4390287 | 2940133 | |
---|---|---|
FID | 4390288 | 2940134 |
IdBundesland | 12 | 9 |
Bundesland | Brandenburg | Bayern |
Landkreis | LK Barnim | SK Rosenheim |
Altersgruppe | A60-A79 | A15-A34 |
Geschlecht | W | W |
AnzahlFall | 1 | 1 |
AnzahlTodesfall | 0 | 0 |
Meldedatum | 2022-03-28 00:00:00 | 2022-02-18 00:00:00 |
IdLandkreis | 12060 | 9163 |
Datenstand | 23.06.2022, 00:00 Uhr | 23.06.2022, 00:00 Uhr |
NeuerFall | 0 | 0 |
NeuerTodesfall | -9 | -9 |
Refdatum | 2022-03-19 00:00:00 | 2022-02-13 00:00:00 |
NeuGenesen | 0 | 0 |
AnzahlGenesen | 1 | 1 |
IstErkrankungsbeginn | 1 | 1 |
Altersgruppe2 | Nicht übermittelt | Nicht übermittelt |
SummeFall | 42054 | 14752 |
SummeTodesfall | 345 | 116 |
SummeGenesen | 41709 | 14636 |
Details Kaiserslautern¶
In [4]:
rki_kl = rki_data[rki_data.Landkreis=='SK Kaiserslautern']
sick_cases = rki_kl.AnzahlFall - rki_kl.AnzahlTodesfall - rki_kl.AnzahlGenesen
print('Total cases:', rki_kl.AnzahlFall.sum())
print('Still sick:', sick_cases.sum())
print('Deaths:', rki_kl.AnzahlTodesfall.sum())
print('Recovered:', rki_kl.AnzahlGenesen.sum())
Total cases: 31937 Still sick: 1351 Deaths: 229 Recovered: 30357
All regions I am interested in¶
In [5]:
interesting_Ks = ['SK Kaiserslautern', 'LK Kaiserslautern', 'LK Bad Dürkheim', 'SK Speyer']
All Cases¶
In [6]:
plot_data = rki_data[rki_data.Landkreis.isin(interesting_Ks)][['Refdatum', 'Landkreis', 'SummeFall']]
plt.figure(figsize=(16, 16))
ax = seaborn.lineplot(x='Refdatum', y='SummeFall', hue='Landkreis',
dashes=False, markers=True, data=plot_data, drawstyle='steps-pre', ci=None, alpha=0.7)
ax.set_xlim(plot_data['Refdatum'].min(), plot_data['Refdatum'].max())
ax.set(xlabel='Zeit', ylabel='Fälle', title='All Reported Cases')
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
Past Two Weeks¶
In [7]:
two_weeks_ago = datetime.datetime.today() - datetime.timedelta(days=14)
two_week_data = rki_data[rki_data.Landkreis.isin(interesting_Ks)].copy()
two_week_data = two_week_data[two_week_data.Refdatum >= two_weeks_ago].copy()
two_week_data
# New sum for two weeks data.
for k in ['Fall', 'Todesfall', 'Genesen']:
two_week_data['Summe' + k] = two_week_data.copy()[['Landkreis', 'Anzahl'+k]].groupby('Landkreis').cumsum()
plt.figure(figsize=(16, 16))
ax = seaborn.lineplot(x='Refdatum', y='SummeFall', hue='Landkreis', data=two_week_data,
dashes=False, markers='.', drawstyle='steps-pre', ci=None, alpha=0.7)
ax.set(xlabel='Time', ylabel='Cases', title='Last Two Weeks.')
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
Past Thirty Days¶
In [8]:
thirty_days_ago = datetime.datetime.today() - datetime.timedelta(days=30)
thirty_days_data = rki_data[rki_data.Landkreis.isin(interesting_Ks)].copy()
thirty_days_data = thirty_days_data[thirty_days_data.Refdatum >= thirty_days_ago].copy()
thirty_days_data
# New sum for two weeks data.
for k in ['Fall', 'Todesfall', 'Genesen']:
thirty_days_data['Summe' + k] = thirty_days_data.copy()[['Landkreis', 'Anzahl'+k]].groupby('Landkreis').cumsum()
plt.figure(figsize=(16, 16))
ax = seaborn.lineplot(x='Refdatum', y='SummeFall', hue='Landkreis', data=thirty_days_data,
dashes=False, markers='.', drawstyle='steps-pre', ci=None, alpha=0.7)
ax.set(xlabel='Zeit', ylabel='Fälle', title='30 Days.')
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
Age and Cases in Germany¶
In [9]:
plot_data = rki_data[rki_data.Landkreis.isin(interesting_Ks)][['Meldedatum', 'Landkreis', 'SummeFall']]
plt.figure(figsize=(16, 16))
ax = seaborn.lineplot(x='Meldedatum', y='SummeFall', hue='Altersgruppe',
dashes=False, markers=True, data=rki_data, drawstyle='steps-pre', ci=None, alpha=0.7,estimator='sum')
ax.set_xlim(plot_data['Meldedatum'].min(), plot_data['Meldedatum'].max())
ax.set(xlabel='Time', ylabel='Cases', title='Cases per Day in Germany by Age.')
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
Death and Age in Germany¶
In [10]:
plot_data = rki_data[rki_data.Landkreis.isin(interesting_Ks)][['Meldedatum', 'Landkreis', 'SummeFall']]
plt.figure(figsize=(16, 16))
ax = seaborn.lineplot(x='Meldedatum', y='AnzahlTodesfall', hue='Altersgruppe',
dashes=False, markers=True, data=rki_data, drawstyle='steps-pre', ci=None, alpha=0.7,estimator='sum')
ax.set_xlim(plot_data['Meldedatum'].min(), plot_data['Meldedatum'].max())
ax.set(xlabel='Time', ylabel='Deaths', title='Deaths per Day in Germany by Age.')
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
In [11]:
total_sums = rki_data.groupby('Altersgruppe').AnzahlFall.sum().reset_index('Altersgruppe')
recovered_sums = rki_data.groupby('Altersgruppe').AnzahlGenesen.sum().reset_index('Altersgruppe')
death_sums = rki_data.groupby('Altersgruppe').AnzahlTodesfall.sum().reset_index('Altersgruppe')
#sums = total_sums.merge(recovered_sums.merge(death_sums))
sums = recovered_sums.merge(death_sums)
fractions = sums.AnzahlTodesfall / (sums.AnzahlGenesen + sums.AnzahlTodesfall)
sums.loc[:,'AnteilTode'] = fractions
sick = total_sums.AnzahlFall - (sums.AnzahlGenesen + sums.AnzahlTodesfall)
sums.loc[:,'Noch Krank'] = sick
sums
Out[11]:
Altersgruppe | AnzahlGenesen | AnzahlTodesfall | AnteilTode | Noch Krank | |
---|---|---|---|---|---|
0 | A00-A04 | 877993 | 34 | 0.000039 | 15248 |
1 | A05-A14 | 3866795 | 30 | 0.000008 | 73238 |
2 | A15-A34 | 8084719 | 403 | 0.000050 | 355526 |
3 | A35-A59 | 9781579 | 7266 | 0.000742 | 461597 |
4 | A60-A79 | 2851322 | 42638 | 0.014733 | 154887 |
5 | A80+ | 847571 | 90173 | 0.096160 | 31315 |
6 | unbekannt | 29860 | 13 | 0.000435 | 785 |
In [12]:
plot_data = rki_data.copy()[['Meldedatum', 'Altersgruppe', 'Landkreis', 'AnzahlFall', 'AnzahlGenesen', 'AnzahlTodesfall']]
plot_data.loc[:,'NochKrank'] = plot_data.AnzahlFall - (plot_data.AnzahlGenesen + plot_data.AnzahlTodesfall)
plot_data = plot_data[plot_data.NochKrank > 0]
plt.figure(figsize=(16, 16))
ax = seaborn.lineplot(x='Meldedatum', y='NochKrank', hue='Altersgruppe',
dashes=False, markers=True, data=plot_data, drawstyle='steps-pre', ci=None, alpha=0.7,estimator='sum')
ax.set_xlim(plot_data['Meldedatum'].min(), plot_data['Meldedatum'].max())
ax.set(xlabel='Time', ylabel='Still Sick', title='Cases wo are still (%s) sick by Meldedatum in Germany.' % datetime.date.today())
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()