mirror of
https://github.com/Andreaierardi/Master-DataScience-Notes.git
synced 2024-10-16 18:40:48 +02:00
336 lines
15 KiB
Plaintext
336 lines
15 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"E:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|
" interactivity=interactivity, compiler=compiler, result=result)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"from datetime import date\n",
|
|
"\n",
|
|
"## Real data: begins with \"R\"\n",
|
|
"# Real Deaths: RD\n",
|
|
"RD = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Deaths.csv\")\n",
|
|
"# Real Cases: RC\n",
|
|
"RC = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Cases.csv\")\n",
|
|
"\n",
|
|
"Rstates = RD.location_name.unique()\n",
|
|
"\n",
|
|
"# Real Series getter\n",
|
|
"def getRS(type, state, aggregateOn = 5):\n",
|
|
" \"\"\"Gets the real cases or deaths series by state\n",
|
|
"\n",
|
|
" Parameters\n",
|
|
" ----------\n",
|
|
" type : str\n",
|
|
" 'C' for cumulative cases. \n",
|
|
" 'D' for cumulative deaths.\n",
|
|
" state : str\n",
|
|
" The state where deaths were recorded\n",
|
|
" aggregateOn : int or bool\n",
|
|
" The weekday to aggregate the observations on.\n",
|
|
" 0 is Monday, 6 is Sunday.\n",
|
|
" Set to false to prevent aggregation.\n",
|
|
"\n",
|
|
" Returns\n",
|
|
" -------\n",
|
|
" pandas.Series : the series of real cases or deaths of the specified state. Indexes are of class pandas.DatetimeIndex.\n",
|
|
" \"\"\"\n",
|
|
" if(type == 'C'):\n",
|
|
" out = pd.Series(RC[RC['location_name'] == state].iloc[:,3].values,\n",
|
|
" index = pd.to_datetime(RC[RC['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n",
|
|
" name = state + \": Cumulative cases\")\n",
|
|
" elif(type == 'D'):\n",
|
|
" out = pd.Series(RD[RD['location_name'] == state].iloc[:,3].values,\n",
|
|
" index = pd.to_datetime(RD[RD['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n",
|
|
" name = state + \": Cumulative deaths\"\n",
|
|
" )\n",
|
|
" if(aggregateOn is not False):\n",
|
|
" out = out[out.index.weekday == aggregateOn]\n",
|
|
"\n",
|
|
" return(out)\n",
|
|
"\n",
|
|
"# Example: getRS('D',Rstates[1])\n",
|
|
"\n",
|
|
"# # shift series to first non-zero occurence\n",
|
|
"# daily_s = daily_s[daily_s>0]\n",
|
|
"# # switch aggregation range to weekly (every Saturday)\n",
|
|
"# D = daily_s[daily_s.index.weekday == 5]\n",
|
|
"\n",
|
|
"\n",
|
|
"## Forecast data: begins with \"F\"\n",
|
|
"# Forecasted cases: FC\n",
|
|
"FC = pd.read_csv(\"https://www.cdc.gov/coronavirus/2019-ncov/downloads/cases-updates/2020-10-19-all-forecasted-cases-model-data.csv\")\n",
|
|
"# Forecasted deaths: FD\n",
|
|
"FD = pd.read_csv('https://www.cdc.gov/coronavirus/2019-ncov/covid-data/files/2020-10-19-model-data.csv')\n",
|
|
"\n",
|
|
"Fmodels = FD.model.unique()\n",
|
|
"Fstates = FD.location_name.unique()\n",
|
|
"\n",
|
|
"# Forecast Series getter\n",
|
|
"def getFS(type, model, state, Fdate):\n",
|
|
" \"\"\"Gets the forecasted deaths series by model, state and forecast date\n",
|
|
"\n",
|
|
" Parameters\n",
|
|
" ----------\n",
|
|
" type : str\n",
|
|
" 'C' for cumulative cases. \n",
|
|
" 'D' for cumulative deaths.\n",
|
|
" model : str\n",
|
|
" The model of the forecast\n",
|
|
" state : str\n",
|
|
" The target state of the forecast\n",
|
|
" Fdate : str or datetime\n",
|
|
" The date when the forecast was performed. If a string, provide the format '%Y-%m-%d'.\n",
|
|
"\n",
|
|
" Returns\n",
|
|
" -------\n",
|
|
" pandas.DataFrame\n",
|
|
" a data frame containing 5 series:\n",
|
|
" - point series\n",
|
|
" - 2.5% quantile\n",
|
|
" - 25% quantile\n",
|
|
" - 75% quantile\n",
|
|
" - 97.5% quantile\n",
|
|
" Indexes are of class pandas.DatetimeIndex.\n",
|
|
" \"\"\"\n",
|
|
" if(type == 'C'):\n",
|
|
" out = FC[(FC.model == model) & (FC.location_name == state) & (FC.forecast_date == Fdate)] \n",
|
|
" elif(type == 'D'):\n",
|
|
" out = FD[(FD.model == model) & (FD.location_name == state) & (FD.forecast_date == Fdate) & FD.target.apply(str.endswith, args=('cum death',0))]\n",
|
|
" else:\n",
|
|
" return None\n",
|
|
" if( out.empty ):\n",
|
|
" return None\n",
|
|
" out = pd.DataFrame(out.iloc[:,-5:].values,\n",
|
|
" columns = out.columns[-5:],\n",
|
|
" index = pd.to_datetime(out.iloc[:,3], format=\"%Y-%m-%d\")\n",
|
|
" )\n",
|
|
" \n",
|
|
" return out\n",
|
|
" \n",
|
|
"# Example: getFS('C', Fmodels[1], Fstates[1], FD.forecast_date[1])\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# prova1 = pd.ExcelFile('Matlab to python/data_models-Florida.xlsx')\n",
|
|
"\n",
|
|
"# prova1.sheet_names\n",
|
|
"\n",
|
|
"# prova2 = prova1.parse('Ensamble')\n",
|
|
"# prova2\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array(['Alabama', 'Alaska', 'American Samoa', ..., 'Uinta County',\n",
|
|
" 'Washakie County', 'Weston County'], dtype=object)"
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"2020-01-25 0\n",
|
|
"2020-02-01 0\n",
|
|
"2020-02-08 0\n",
|
|
"2020-02-15 0\n",
|
|
"2020-02-22 0\n",
|
|
"2020-02-29 0\n",
|
|
"2020-03-07 0\n",
|
|
"2020-03-14 0\n",
|
|
"2020-03-21 0\n",
|
|
"2020-03-28 2\n",
|
|
"2020-04-04 5\n",
|
|
"2020-04-11 8\n",
|
|
"2020-04-18 9\n",
|
|
"2020-04-25 9\n",
|
|
"2020-05-02 9\n",
|
|
"2020-05-09 10\n",
|
|
"2020-05-16 10\n",
|
|
"2020-05-23 10\n",
|
|
"2020-05-30 10\n",
|
|
"2020-06-06 10\n",
|
|
"2020-06-13 12\n",
|
|
"2020-06-20 12\n",
|
|
"2020-06-27 14\n",
|
|
"2020-07-04 16\n",
|
|
"2020-07-11 17\n",
|
|
"2020-07-18 18\n",
|
|
"2020-07-25 20\n",
|
|
"2020-08-01 24\n",
|
|
"2020-08-08 26\n",
|
|
"2020-08-15 28\n",
|
|
"2020-08-22 31\n",
|
|
"2020-08-29 37\n",
|
|
"2020-09-05 42\n",
|
|
"2020-09-12 44\n",
|
|
"2020-09-19 45\n",
|
|
"2020-09-26 52\n",
|
|
"2020-10-03 58\n",
|
|
"2020-10-10 60\n",
|
|
"2020-10-17 67\n",
|
|
"2020-10-24 68\n",
|
|
"2020-10-31 82\n",
|
|
"2020-11-07 84\n",
|
|
"2020-11-14 98\n",
|
|
"Name: Alaska: Cumulative deaths, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
" getRS('D',Rstates[1])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data= getFS('D', Fmodels[1], Fstates[1], FD.forecast_date[1])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([[ 74., 67., 71., 78., 84.],\n",
|
|
" [ 81., 67., 75., 89., 105.],\n",
|
|
" [ 89., 67., 79., 101., 123.],\n",
|
|
" [ 99., 67., 81., 111., 141.]])"
|
|
]
|
|
},
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"data.values"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"2020-10-24 00:00:00\n",
|
|
"2020-10-31 00:00:00\n",
|
|
"2020-11-07 00:00:00\n",
|
|
"2020-11-14 00:00:00\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for i in data.index.tolist():\n",
|
|
" print(i)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "KeyError",
|
|
"evalue": "'target_week_end_data'",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[1;32mE:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2896\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2897\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2898\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
|
|
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
|
|
"\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
|
|
"\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
|
|
"\u001b[1;31mKeyError\u001b[0m: 'target_week_end_data'",
|
|
"\nDuring handling of the above exception, another exception occurred:\n",
|
|
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[1;32m<ipython-input-38-62575f9ff9bf>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"target_week_end_data\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
|
"\u001b[1;32mE:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2978\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2979\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2980\u001b[1;33m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2981\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2982\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;32mE:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2897\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2898\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2899\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2900\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2901\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
|
|
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
|
|
"\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
|
|
"\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
|
|
"\u001b[1;31mKeyError\u001b[0m: 'target_week_end_data'"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"data[\"target_week_end_data\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|