In [1]:
import numpy as np
import pandas as pd
from datetime import date

## Real data: begins with "R"
# Real Deaths: RD
RD = pd.read_csv("https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Deaths.csv")
# Real Cases: RC
RC = pd.read_csv("https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Cases.csv")

Rstates = RD.location_name.unique()

# Real Series getter
def getRS(type, state, aggregateOn = 5):
    """Gets the real cases or deaths series by state

    Parameters
    ----------
    type : str
        'C' for cumulative cases. 
        'D' for cumulative deaths.
    state : str
        The state where deaths were recorded
    aggregateOn : int or bool
        The weekday to aggregate the observations on.
        0 is Monday, 6 is Sunday.
        Set to false to prevent aggregation.

    Returns
    -------
    pandas.Series : the series of real cases or deaths of the specified state. Indexes are of class pandas.DatetimeIndex.
    """
    if(type == 'C'):
        out = pd.Series(RC[RC['location_name'] == state].iloc[:,3].values,
          index = pd.to_datetime(RC[RC['location_name'] == state].iloc[:,0].values, format="%Y-%m-%d"),
          name = state + ": Cumulative cases")
    elif(type == 'D'):
        out = pd.Series(RD[RD['location_name'] == state].iloc[:,3].values,
                    index = pd.to_datetime(RD[RD['location_name'] == state].iloc[:,0].values, format="%Y-%m-%d"),
                    name = state + ": Cumulative deaths"
        )
        if(aggregateOn is not False):
            out = out[out.index.weekday == aggregateOn]

    return(out)

# Example: getRS('D',Rstates[1])

# # shift series to first non-zero occurence
# daily_s = daily_s[daily_s>0]
# # switch aggregation range to weekly (every Saturday)
# D = daily_s[daily_s.index.weekday == 5]


## Forecast data: begins with "F"
# Forecasted cases: FC
FC = pd.read_csv("https://www.cdc.gov/coronavirus/2019-ncov/downloads/cases-updates/2020-10-19-all-forecasted-cases-model-data.csv")
# Forecasted deaths: FD
FD = pd.read_csv('https://www.cdc.gov/coronavirus/2019-ncov/covid-data/files/2020-10-19-model-data.csv')

Fmodels = FD.model.unique()
Fstates = FD.location_name.unique()

# Forecast Series getter
def getFS(type, model, state, Fdate):
    """Gets the forecasted deaths series by model, state and forecast date

    Parameters
    ----------
    type : str
        'C' for cumulative cases. 
        'D' for cumulative deaths.
    model : str
        The model of the forecast
    state : str
        The target state of the forecast
    Fdate : str or datetime
        The date when the forecast was performed. If a string, provide the format '%Y-%m-%d'.

    Returns
    -------
    pandas.DataFrame
        a data frame containing 5 series:
           - point series
           - 2.5% quantile
           - 25% quantile
           - 75% quantile
           - 97.5% quantile
        Indexes are of class pandas.DatetimeIndex.
    """
    if(type == 'C'):
        out = FC[(FC.model == model) & (FC.location_name == state) & (FC.forecast_date == Fdate)] 
    elif(type == 'D'):
        out = FD[(FD.model == model) & (FD.location_name == state) & (FD.forecast_date == Fdate) & FD.target.apply(str.endswith, args=('cum death',0))]
    else:
        return None
    if( out.empty ):
        return None
    out = pd.DataFrame(out.iloc[:,-5:].values,
                columns = out.columns[-5:],
                index = pd.to_datetime(out.iloc[:,3], format="%Y-%m-%d")
                   )
     
    return out
     
# Example: getFS('C', Fmodels[1], Fstates[1], FD.forecast_date[1])




# prova1 = pd.ExcelFile('Matlab to python/data_models-Florida.xlsx')

# prova1.sheet_names

# prova2 = prova1.parse('Ensamble')
# prova2


  interactivity=interactivity, compiler=compiler, result=result)


array(['Alabama', 'Alaska', 'American Samoa', ..., 'Uinta County',
       'Washakie County', 'Weston County'], dtype=object)

In [3]:
 getRS('D',Rstates[1])

2020-01-25     0
2020-02-01     0
2020-02-08     0
2020-02-15     0
2020-02-22     0
2020-02-29     0
2020-03-07     0
2020-03-14     0
2020-03-21     0
2020-03-28     2
2020-04-04     5
2020-04-11     8
2020-04-18     9
2020-04-25     9
2020-05-02     9
2020-05-09    10
2020-05-16    10
2020-05-23    10
2020-05-30    10
2020-06-06    10
2020-06-13    12
2020-06-20    12
2020-06-27    14
2020-07-04    16
2020-07-11    17
2020-07-18    18
2020-07-25    20
2020-08-01    24
2020-08-08    26
2020-08-15    28
2020-08-22    31
2020-08-29    37
2020-09-05    42
2020-09-12    44
2020-09-19    45
2020-09-26    52
2020-10-03    58
2020-10-10    60
2020-10-17    67
2020-10-24    68
2020-10-31    82
2020-11-07    84
2020-11-14    98
Name: Alaska: Cumulative deaths, dtype: int64

In [32]:
data= getFS('D', Fmodels[1], Fstates[1], FD.forecast_date[1])

In [33]:
data.values

array([[ 74.,  67.,  71.,  78.,  84.],
       [ 81.,  67.,  75.,  89., 105.],
       [ 89.,  67.,  79., 101., 123.],
       [ 99.,  67.,  81., 111., 141.]])

In [36]:
for i in data.index.tolist():
    print(i)

2020-10-24 00:00:00
2020-10-31 00:00:00
2020-11-07 00:00:00
2020-11-14 00:00:00


In [39]:
data

Unnamed: 0_level_0,point,quantile_0.025,quantile_0.25,quantile_0.75,quantile_0.975
target_week_end_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-24,74.0,67.0,71.0,78.0,84.0
2020-10-31,81.0,67.0,75.0,89.0,105.0
2020-11-07,89.0,67.0,79.0,101.0,123.0
2020-11-14,99.0,67.0,81.0,111.0,141.0


In [42]:
type(data)

pandas.core.frame.DataFrame

In [51]:
[i.split() for i in data.index.tolist()]

AttributeError: 'Timestamp' object has no attribute 'split'

In [52]:
data

Unnamed: 0_level_0,point,quantile_0.025,quantile_0.25,quantile_0.75,quantile_0.975
target_week_end_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-24,74.0,67.0,71.0,78.0,84.0
2020-10-31,81.0,67.0,75.0,89.0,105.0
2020-11-07,89.0,67.0,79.0,101.0,123.0
2020-11-14,99.0,67.0,81.0,111.0,141.0


In [53]:
data.index

DatetimeIndex(['2020-10-24', '2020-10-31', '2020-11-07', '2020-11-14'], dtype='datetime64[ns]', name='target_week_end_date', freq=None)

In [56]:
str(data.index[1])

'2020-10-31 00:00:00'

In [63]:
 for i in pd.Series(data.index.tolist()):
        print(i)

2020-10-24 00:00:00
2020-10-31 00:00:00
2020-11-07 00:00:00
2020-11-14 00:00:00
