{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from datetime import date\n",
    "\n",
    "## Real data: begins with \"R\"\n",
    "# Real Deaths: RD\n",
    "RD = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Deaths.csv\")\n",
    "# Real Cases: RC\n",
    "RC = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Cases.csv\")\n",
    "\n",
    "Rstates = RD.location_name.unique()\n",
    "\n",
    "# Real Series getter\n",
    "def getRS(type, state, aggregateOn = 5):\n",
    "    \"\"\"Gets the real cases or deaths series by state\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    type : str\n",
    "        'C' for cumulative cases. \n",
    "        'D' for cumulative deaths.\n",
    "    state : str\n",
    "        The state where deaths were recorded\n",
    "    aggregateOn : int or bool\n",
    "        The weekday to aggregate the observations on.\n",
    "        0 is Monday, 6 is Sunday.\n",
    "        Set to false to prevent aggregation.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    pandas.Series : the series of real cases or deaths of the specified state. Indexes are of class pandas.DatetimeIndex.\n",
    "    \"\"\"\n",
    "    if(type == 'C'):\n",
    "        out = pd.Series(RC[RC['location_name'] == state].iloc[:,3].values,\n",
    "          index = pd.to_datetime(RC[RC['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n",
    "          name = state + \": Cumulative cases\")\n",
    "    elif(type == 'D'):\n",
    "        out = pd.Series(RD[RD['location_name'] == state].iloc[:,3].values,\n",
    "                    index = pd.to_datetime(RD[RD['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n",
    "                    name = state + \": Cumulative deaths\"\n",
    "        )\n",
    "        if(aggregateOn is not False):\n",
    "            out = out[out.index.weekday == aggregateOn]\n",
    "\n",
    "    return(out)\n",
    "\n",
    "# Example: getRS('D',Rstates[1])\n",
    "\n",
    "# # shift series to first non-zero occurence\n",
    "# daily_s = daily_s[daily_s>0]\n",
    "# # switch aggregation range to weekly (every Saturday)\n",
    "# D = daily_s[daily_s.index.weekday == 5]\n",
    "\n",
    "\n",
    "## Forecast data: begins with \"F\"\n",
    "# Forecasted cases: FC\n",
    "FC = pd.read_csv(\"https://www.cdc.gov/coronavirus/2019-ncov/downloads/cases-updates/2020-10-19-all-forecasted-cases-model-data.csv\")\n",
    "# Forecasted deaths: FD\n",
    "FD = pd.read_csv('https://www.cdc.gov/coronavirus/2019-ncov/covid-data/files/2020-10-19-model-data.csv')\n",
    "\n",
    "Fmodels = FD.model.unique()\n",
    "Fstates = FD.location_name.unique()\n",
    "\n",
    "# Forecast Series getter\n",
    "def getFS(type, model, state, Fdate):\n",
    "    \"\"\"Gets the forecasted deaths series by model, state and forecast date\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    type : str\n",
    "        'C' for cumulative cases. \n",
    "        'D' for cumulative deaths.\n",
    "    model : str\n",
    "        The model of the forecast\n",
    "    state : str\n",
    "        The target state of the forecast\n",
    "    Fdate : str or datetime\n",
    "        The date when the forecast was performed. If a string, provide the format '%Y-%m-%d'.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    pandas.DataFrame\n",
    "        a data frame containing 5 series:\n",
    "           - point series\n",
    "           - 2.5% quantile\n",
    "           - 25% quantile\n",
    "           - 75% quantile\n",
    "           - 97.5% quantile\n",
    "        Indexes are of class pandas.DatetimeIndex.\n",
    "    \"\"\"\n",
    "    if(type == 'C'):\n",
    "        out = FC[(FC.model == model) & (FC.location_name == state) & (FC.forecast_date == Fdate)] \n",
    "    elif(type == 'D'):\n",
    "        out = FD[(FD.model == model) & (FD.location_name == state) & (FD.forecast_date == Fdate) & FD.target.apply(str.endswith, args=('cum death',0))]\n",
    "    else:\n",
    "        return None\n",
    "    if( out.empty ):\n",
    "        return None\n",
    "    out = pd.DataFrame(out.iloc[:,-5:].values,\n",
    "                columns = out.columns[-5:],\n",
    "                index = pd.to_datetime(out.iloc[:,3], format=\"%Y-%m-%d\")\n",
    "                   )\n",
    "     \n",
    "    return out\n",
    "     \n",
    "# Example: getFS('C', Fmodels[1], Fstates[1], FD.forecast_date[1])\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "# prova1 = pd.ExcelFile('Matlab to python/data_models-Florida.xlsx')\n",
    "\n",
    "# prova1.sheet_names\n",
    "\n",
    "# prova2 = prova1.parse('Ensamble')\n",
    "# prova2\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Alabama', 'Alaska', 'American Samoa', ..., 'Uinta County',\n",
       "       'Washakie County', 'Weston County'], dtype=object)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2020-01-25     0\n",
       "2020-02-01     0\n",
       "2020-02-08     0\n",
       "2020-02-15     0\n",
       "2020-02-22     0\n",
       "2020-02-29     0\n",
       "2020-03-07     0\n",
       "2020-03-14     0\n",
       "2020-03-21     0\n",
       "2020-03-28     2\n",
       "2020-04-04     5\n",
       "2020-04-11     8\n",
       "2020-04-18     9\n",
       "2020-04-25     9\n",
       "2020-05-02     9\n",
       "2020-05-09    10\n",
       "2020-05-16    10\n",
       "2020-05-23    10\n",
       "2020-05-30    10\n",
       "2020-06-06    10\n",
       "2020-06-13    12\n",
       "2020-06-20    12\n",
       "2020-06-27    14\n",
       "2020-07-04    16\n",
       "2020-07-11    17\n",
       "2020-07-18    18\n",
       "2020-07-25    20\n",
       "2020-08-01    24\n",
       "2020-08-08    26\n",
       "2020-08-15    28\n",
       "2020-08-22    31\n",
       "2020-08-29    37\n",
       "2020-09-05    42\n",
       "2020-09-12    44\n",
       "2020-09-19    45\n",
       "2020-09-26    52\n",
       "2020-10-03    58\n",
       "2020-10-10    60\n",
       "2020-10-17    67\n",
       "2020-10-24    68\n",
       "2020-10-31    82\n",
       "2020-11-07    84\n",
       "2020-11-14    98\n",
       "Name: Alaska: Cumulative deaths, dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " getRS('D',Rstates[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "data= getFS('D', Fmodels[1], Fstates[1], FD.forecast_date[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 74.,  67.,  71.,  78.,  84.],\n",
       "       [ 81.,  67.,  75.,  89., 105.],\n",
       "       [ 89.,  67.,  79., 101., 123.],\n",
       "       [ 99.,  67.,  81., 111., 141.]])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2020-10-24 00:00:00\n",
      "2020-10-31 00:00:00\n",
      "2020-11-07 00:00:00\n",
      "2020-11-14 00:00:00\n"
     ]
    }
   ],
   "source": [
    "for i in data.index.tolist():\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>point</th>\n",
       "      <th>quantile_0.025</th>\n",
       "      <th>quantile_0.25</th>\n",
       "      <th>quantile_0.75</th>\n",
       "      <th>quantile_0.975</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target_week_end_date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>2020-10-24</td>\n",
       "      <td>74.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>84.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2020-10-31</td>\n",
       "      <td>81.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>105.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2020-11-07</td>\n",
       "      <td>89.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>123.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2020-11-14</td>\n",
       "      <td>99.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>111.0</td>\n",
       "      <td>141.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      point  quantile_0.025  quantile_0.25  quantile_0.75  \\\n",
       "target_week_end_date                                                        \n",
       "2020-10-24             74.0            67.0           71.0           78.0   \n",
       "2020-10-31             81.0            67.0           75.0           89.0   \n",
       "2020-11-07             89.0            67.0           79.0          101.0   \n",
       "2020-11-14             99.0            67.0           81.0          111.0   \n",
       "\n",
       "                      quantile_0.975  \n",
       "target_week_end_date                  \n",
       "2020-10-24                      84.0  \n",
       "2020-10-31                     105.0  \n",
       "2020-11-07                     123.0  \n",
       "2020-11-14                     141.0  "
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'Timestamp' object has no attribute 'split'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-51-a8ece29f578b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-51-a8ece29f578b>\u001b[0m in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m: 'Timestamp' object has no attribute 'split'"
     ]
    }
   ],
   "source": [
    "[i.split() for i in data.index.tolist()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>point</th>\n",
       "      <th>quantile_0.025</th>\n",
       "      <th>quantile_0.25</th>\n",
       "      <th>quantile_0.75</th>\n",
       "      <th>quantile_0.975</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>target_week_end_date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>2020-10-24</td>\n",
       "      <td>74.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>84.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2020-10-31</td>\n",
       "      <td>81.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>105.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2020-11-07</td>\n",
       "      <td>89.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>123.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2020-11-14</td>\n",
       "      <td>99.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>111.0</td>\n",
       "      <td>141.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      point  quantile_0.025  quantile_0.25  quantile_0.75  \\\n",
       "target_week_end_date                                                        \n",
       "2020-10-24             74.0            67.0           71.0           78.0   \n",
       "2020-10-31             81.0            67.0           75.0           89.0   \n",
       "2020-11-07             89.0            67.0           79.0          101.0   \n",
       "2020-11-14             99.0            67.0           81.0          111.0   \n",
       "\n",
       "                      quantile_0.975  \n",
       "target_week_end_date                  \n",
       "2020-10-24                      84.0  \n",
       "2020-10-31                     105.0  \n",
       "2020-11-07                     123.0  \n",
       "2020-11-14                     141.0  "
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2020-10-24', '2020-10-31', '2020-11-07', '2020-11-14'], dtype='datetime64[ns]', name='target_week_end_date', freq=None)"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2020-10-31 00:00:00'"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "str(data.index[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2020-10-24 00:00:00\n",
      "2020-10-31 00:00:00\n",
      "2020-11-07 00:00:00\n",
      "2020-11-14 00:00:00\n"
     ]
    }
   ],
   "source": [
    " for i in pd.Series(data.index.tolist()):\n",
    "        print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}