{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "E:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "from datetime import date\n", "\n", "## Real data: begins with \"R\"\n", "# Real Deaths: RD\n", "RD = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Deaths.csv\")\n", "# Real Cases: RC\n", "RC = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Cases.csv\")\n", "\n", "Rstates = RD.location_name.unique()\n", "\n", "# Real Series getter\n", "def getRS(type, state, aggregateOn = 5):\n", " \"\"\"Gets the real cases or deaths series by state\n", "\n", " Parameters\n", " ----------\n", " type : str\n", " 'C' for cumulative cases. \n", " 'D' for cumulative deaths.\n", " state : str\n", " The state where deaths were recorded\n", " aggregateOn : int or bool\n", " The weekday to aggregate the observations on.\n", " 0 is Monday, 6 is Sunday.\n", " Set to false to prevent aggregation.\n", "\n", " Returns\n", " -------\n", " pandas.Series : the series of real cases or deaths of the specified state. Indexes are of class pandas.DatetimeIndex.\n", " \"\"\"\n", " if(type == 'C'):\n", " out = pd.Series(RC[RC['location_name'] == state].iloc[:,3].values,\n", " index = pd.to_datetime(RC[RC['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n", " name = state + \": Cumulative cases\")\n", " elif(type == 'D'):\n", " out = pd.Series(RD[RD['location_name'] == state].iloc[:,3].values,\n", " index = pd.to_datetime(RD[RD['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n", " name = state + \": Cumulative deaths\"\n", " )\n", " if(aggregateOn is not False):\n", " out = out[out.index.weekday == aggregateOn]\n", "\n", " return(out)\n", "\n", "# Example: getRS('D',Rstates[1])\n", "\n", "# # shift series to first non-zero occurence\n", "# daily_s = daily_s[daily_s>0]\n", "# # switch aggregation range to weekly (every Saturday)\n", "# D = daily_s[daily_s.index.weekday == 5]\n", "\n", "\n", "## Forecast data: begins with \"F\"\n", "# Forecasted cases: FC\n", "FC = pd.read_csv(\"https://www.cdc.gov/coronavirus/2019-ncov/downloads/cases-updates/2020-10-19-all-forecasted-cases-model-data.csv\")\n", "# Forecasted deaths: FD\n", "FD = pd.read_csv('https://www.cdc.gov/coronavirus/2019-ncov/covid-data/files/2020-10-19-model-data.csv')\n", "\n", "Fmodels = FD.model.unique()\n", "Fstates = FD.location_name.unique()\n", "\n", "# Forecast Series getter\n", "def getFS(type, model, state, Fdate):\n", " \"\"\"Gets the forecasted deaths series by model, state and forecast date\n", "\n", " Parameters\n", " ----------\n", " type : str\n", " 'C' for cumulative cases. \n", " 'D' for cumulative deaths.\n", " model : str\n", " The model of the forecast\n", " state : str\n", " The target state of the forecast\n", " Fdate : str or datetime\n", " The date when the forecast was performed. If a string, provide the format '%Y-%m-%d'.\n", "\n", " Returns\n", " -------\n", " pandas.DataFrame\n", " a data frame containing 5 series:\n", " - point series\n", " - 2.5% quantile\n", " - 25% quantile\n", " - 75% quantile\n", " - 97.5% quantile\n", " Indexes are of class pandas.DatetimeIndex.\n", " \"\"\"\n", " if(type == 'C'):\n", " out = FC[(FC.model == model) & (FC.location_name == state) & (FC.forecast_date == Fdate)] \n", " elif(type == 'D'):\n", " out = FD[(FD.model == model) & (FD.location_name == state) & (FD.forecast_date == Fdate) & FD.target.apply(str.endswith, args=('cum death',0))]\n", " else:\n", " return None\n", " if( out.empty ):\n", " return None\n", " out = pd.DataFrame(out.iloc[:,-5:].values,\n", " columns = out.columns[-5:],\n", " index = pd.to_datetime(out.iloc[:,3], format=\"%Y-%m-%d\")\n", " )\n", " \n", " return out\n", " \n", "# Example: getFS('C', Fmodels[1], Fstates[1], FD.forecast_date[1])\n", "\n", "\n", "\n", "\n", "# prova1 = pd.ExcelFile('Matlab to python/data_models-Florida.xlsx')\n", "\n", "# prova1.sheet_names\n", "\n", "# prova2 = prova1.parse('Ensamble')\n", "# prova2\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Alabama', 'Alaska', 'American Samoa', ..., 'Uinta County',\n", " 'Washakie County', 'Weston County'], dtype=object)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2020-01-25 0\n", "2020-02-01 0\n", "2020-02-08 0\n", "2020-02-15 0\n", "2020-02-22 0\n", "2020-02-29 0\n", "2020-03-07 0\n", "2020-03-14 0\n", "2020-03-21 0\n", "2020-03-28 2\n", "2020-04-04 5\n", "2020-04-11 8\n", "2020-04-18 9\n", "2020-04-25 9\n", "2020-05-02 9\n", "2020-05-09 10\n", "2020-05-16 10\n", "2020-05-23 10\n", "2020-05-30 10\n", "2020-06-06 10\n", "2020-06-13 12\n", "2020-06-20 12\n", "2020-06-27 14\n", "2020-07-04 16\n", "2020-07-11 17\n", "2020-07-18 18\n", "2020-07-25 20\n", "2020-08-01 24\n", "2020-08-08 26\n", "2020-08-15 28\n", "2020-08-22 31\n", "2020-08-29 37\n", "2020-09-05 42\n", "2020-09-12 44\n", "2020-09-19 45\n", "2020-09-26 52\n", "2020-10-03 58\n", "2020-10-10 60\n", "2020-10-17 67\n", "2020-10-24 68\n", "2020-10-31 82\n", "2020-11-07 84\n", "2020-11-14 98\n", "Name: Alaska: Cumulative deaths, dtype: int64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ " getRS('D',Rstates[1])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "data= getFS('D', Fmodels[1], Fstates[1], FD.forecast_date[1])" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 74., 67., 71., 78., 84.],\n", " [ 81., 67., 75., 89., 105.],\n", " [ 89., 67., 79., 101., 123.],\n", " [ 99., 67., 81., 111., 141.]])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.values" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-10-24 00:00:00\n", "2020-10-31 00:00:00\n", "2020-11-07 00:00:00\n", "2020-11-14 00:00:00\n" ] } ], "source": [ "for i in data.index.tolist():\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pointquantile_0.025quantile_0.25quantile_0.75quantile_0.975
target_week_end_date
2020-10-2474.067.071.078.084.0
2020-10-3181.067.075.089.0105.0
2020-11-0789.067.079.0101.0123.0
2020-11-1499.067.081.0111.0141.0
\n", "
" ], "text/plain": [ " point quantile_0.025 quantile_0.25 quantile_0.75 \\\n", "target_week_end_date \n", "2020-10-24 74.0 67.0 71.0 78.0 \n", "2020-10-31 81.0 67.0 75.0 89.0 \n", "2020-11-07 89.0 67.0 79.0 101.0 \n", "2020-11-14 99.0 67.0 81.0 111.0 \n", "\n", " quantile_0.975 \n", "target_week_end_date \n", "2020-10-24 84.0 \n", "2020-10-31 105.0 \n", "2020-11-07 123.0 \n", "2020-11-14 141.0 " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.frame.DataFrame" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(data)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'Timestamp' object has no attribute 'split'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m: 'Timestamp' object has no attribute 'split'" ] } ], "source": [ "[i.split() for i in data.index.tolist()]" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pointquantile_0.025quantile_0.25quantile_0.75quantile_0.975
target_week_end_date
2020-10-2474.067.071.078.084.0
2020-10-3181.067.075.089.0105.0
2020-11-0789.067.079.0101.0123.0
2020-11-1499.067.081.0111.0141.0
\n", "
" ], "text/plain": [ " point quantile_0.025 quantile_0.25 quantile_0.75 \\\n", "target_week_end_date \n", "2020-10-24 74.0 67.0 71.0 78.0 \n", "2020-10-31 81.0 67.0 75.0 89.0 \n", "2020-11-07 89.0 67.0 79.0 101.0 \n", "2020-11-14 99.0 67.0 81.0 111.0 \n", "\n", " quantile_0.975 \n", "target_week_end_date \n", "2020-10-24 84.0 \n", "2020-10-31 105.0 \n", "2020-11-07 123.0 \n", "2020-11-14 141.0 " ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2020-10-24', '2020-10-31', '2020-11-07', '2020-11-14'], dtype='datetime64[ns]', name='target_week_end_date', freq=None)" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.index" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2020-10-31 00:00:00'" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "str(data.index[1])" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-10-24 00:00:00\n", "2020-10-31 00:00:00\n", "2020-11-07 00:00:00\n", "2020-11-14 00:00:00\n" ] } ], "source": [ " for i in pd.Series(data.index.tolist()):\n", " print(i)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }