diff --git a/1year/2trimester/Coding for Data Science - Python language/Python/Examples/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/1year/2trimester/Coding for Data Science - Python language/Python/Examples/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 000000000..1b92d837b --- /dev/null +++ b/1year/2trimester/Coding for Data Science - Python language/Python/Examples/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,335 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "E:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " interactivity=interactivity, compiler=compiler, result=result)\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from datetime import date\n", + "\n", + "## Real data: begins with \"R\"\n", + "# Real Deaths: RD\n", + "RD = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Deaths.csv\")\n", + "# Real Cases: RC\n", + "RC = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Cases.csv\")\n", + "\n", + "Rstates = RD.location_name.unique()\n", + "\n", + "# Real Series getter\n", + "def getRS(type, state, aggregateOn = 5):\n", + " \"\"\"Gets the real cases or deaths series by state\n", + "\n", + " Parameters\n", + " ----------\n", + " type : str\n", + " 'C' for cumulative cases. \n", + " 'D' for cumulative deaths.\n", + " state : str\n", + " The state where deaths were recorded\n", + " aggregateOn : int or bool\n", + " The weekday to aggregate the observations on.\n", + " 0 is Monday, 6 is Sunday.\n", + " Set to false to prevent aggregation.\n", + "\n", + " Returns\n", + " -------\n", + " pandas.Series : the series of real cases or deaths of the specified state. Indexes are of class pandas.DatetimeIndex.\n", + " \"\"\"\n", + " if(type == 'C'):\n", + " out = pd.Series(RC[RC['location_name'] == state].iloc[:,3].values,\n", + " index = pd.to_datetime(RC[RC['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n", + " name = state + \": Cumulative cases\")\n", + " elif(type == 'D'):\n", + " out = pd.Series(RD[RD['location_name'] == state].iloc[:,3].values,\n", + " index = pd.to_datetime(RD[RD['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n", + " name = state + \": Cumulative deaths\"\n", + " )\n", + " if(aggregateOn is not False):\n", + " out = out[out.index.weekday == aggregateOn]\n", + "\n", + " return(out)\n", + "\n", + "# Example: getRS('D',Rstates[1])\n", + "\n", + "# # shift series to first non-zero occurence\n", + "# daily_s = daily_s[daily_s>0]\n", + "# # switch aggregation range to weekly (every Saturday)\n", + "# D = daily_s[daily_s.index.weekday == 5]\n", + "\n", + "\n", + "## Forecast data: begins with \"F\"\n", + "# Forecasted cases: FC\n", + "FC = pd.read_csv(\"https://www.cdc.gov/coronavirus/2019-ncov/downloads/cases-updates/2020-10-19-all-forecasted-cases-model-data.csv\")\n", + "# Forecasted deaths: FD\n", + "FD = pd.read_csv('https://www.cdc.gov/coronavirus/2019-ncov/covid-data/files/2020-10-19-model-data.csv')\n", + "\n", + "Fmodels = FD.model.unique()\n", + "Fstates = FD.location_name.unique()\n", + "\n", + "# Forecast Series getter\n", + "def getFS(type, model, state, Fdate):\n", + " \"\"\"Gets the forecasted deaths series by model, state and forecast date\n", + "\n", + " Parameters\n", + " ----------\n", + " type : str\n", + " 'C' for cumulative cases. \n", + " 'D' for cumulative deaths.\n", + " model : str\n", + " The model of the forecast\n", + " state : str\n", + " The target state of the forecast\n", + " Fdate : str or datetime\n", + " The date when the forecast was performed. If a string, provide the format '%Y-%m-%d'.\n", + "\n", + " Returns\n", + " -------\n", + " pandas.DataFrame\n", + " a data frame containing 5 series:\n", + " - point series\n", + " - 2.5% quantile\n", + " - 25% quantile\n", + " - 75% quantile\n", + " - 97.5% quantile\n", + " Indexes are of class pandas.DatetimeIndex.\n", + " \"\"\"\n", + " if(type == 'C'):\n", + " out = FC[(FC.model == model) & (FC.location_name == state) & (FC.forecast_date == Fdate)] \n", + " elif(type == 'D'):\n", + " out = FD[(FD.model == model) & (FD.location_name == state) & (FD.forecast_date == Fdate) & FD.target.apply(str.endswith, args=('cum death',0))]\n", + " else:\n", + " return None\n", + " if( out.empty ):\n", + " return None\n", + " out = pd.DataFrame(out.iloc[:,-5:].values,\n", + " columns = out.columns[-5:],\n", + " index = pd.to_datetime(out.iloc[:,3], format=\"%Y-%m-%d\")\n", + " )\n", + " \n", + " return out\n", + " \n", + "# Example: getFS('C', Fmodels[1], Fstates[1], FD.forecast_date[1])\n", + "\n", + "\n", + "\n", + "\n", + "# prova1 = pd.ExcelFile('Matlab to python/data_models-Florida.xlsx')\n", + "\n", + "# prova1.sheet_names\n", + "\n", + "# prova2 = prova1.parse('Ensamble')\n", + "# prova2\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Alabama', 'Alaska', 'American Samoa', ..., 'Uinta County',\n", + " 'Washakie County', 'Weston County'], dtype=object)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2020-01-25 0\n", + "2020-02-01 0\n", + "2020-02-08 0\n", + "2020-02-15 0\n", + "2020-02-22 0\n", + "2020-02-29 0\n", + "2020-03-07 0\n", + "2020-03-14 0\n", + "2020-03-21 0\n", + "2020-03-28 2\n", + "2020-04-04 5\n", + "2020-04-11 8\n", + "2020-04-18 9\n", + "2020-04-25 9\n", + "2020-05-02 9\n", + "2020-05-09 10\n", + "2020-05-16 10\n", + "2020-05-23 10\n", + "2020-05-30 10\n", + "2020-06-06 10\n", + "2020-06-13 12\n", + "2020-06-20 12\n", + "2020-06-27 14\n", + "2020-07-04 16\n", + "2020-07-11 17\n", + "2020-07-18 18\n", + "2020-07-25 20\n", + "2020-08-01 24\n", + "2020-08-08 26\n", + "2020-08-15 28\n", + "2020-08-22 31\n", + "2020-08-29 37\n", + "2020-09-05 42\n", + "2020-09-12 44\n", + "2020-09-19 45\n", + "2020-09-26 52\n", + "2020-10-03 58\n", + "2020-10-10 60\n", + "2020-10-17 67\n", + "2020-10-24 68\n", + "2020-10-31 82\n", + "2020-11-07 84\n", + "2020-11-14 98\n", + "Name: Alaska: Cumulative deaths, dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " getRS('D',Rstates[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "data= getFS('D', Fmodels[1], Fstates[1], FD.forecast_date[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 74., 67., 71., 78., 84.],\n", + " [ 81., 67., 75., 89., 105.],\n", + " [ 89., 67., 79., 101., 123.],\n", + " [ 99., 67., 81., 111., 141.]])" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.values" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2020-10-24 00:00:00\n", + "2020-10-31 00:00:00\n", + "2020-11-07 00:00:00\n", + "2020-11-14 00:00:00\n" + ] + } + ], + "source": [ + "for i in data.index.tolist():\n", + " print(i)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'target_week_end_data'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mE:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2896\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2897\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2898\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'target_week_end_data'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"target_week_end_data\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32mE:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2978\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2979\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2980\u001b[1;33m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2981\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2982\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mE:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2897\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2898\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2899\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2900\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2901\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'target_week_end_data'" + ] + } + ], + "source": [ + "data[\"target_week_end_data\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/1year/2trimester/Coding for Data Science - Python language/Python/Examples/Untitled.ipynb b/1year/2trimester/Coding for Data Science - Python language/Python/Examples/Untitled.ipynb new file mode 100644 index 000000000..8f6d06f45 --- /dev/null +++ b/1year/2trimester/Coding for Data Science - Python language/Python/Examples/Untitled.ipynb @@ -0,0 +1,609 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "E:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " interactivity=interactivity, compiler=compiler, result=result)\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from datetime import date\n", + "\n", + "## Real data: begins with \"R\"\n", + "# Real Deaths: RD\n", + "RD = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Deaths.csv\")\n", + "# Real Cases: RC\n", + "RC = pd.read_csv(\"https://raw.githubusercontent.com/reichlab/covid19-forecast-hub/master/data-truth/truth-Cumulative%20Cases.csv\")\n", + "\n", + "Rstates = RD.location_name.unique()\n", + "\n", + "# Real Series getter\n", + "def getRS(type, state, aggregateOn = 5):\n", + " \"\"\"Gets the real cases or deaths series by state\n", + "\n", + " Parameters\n", + " ----------\n", + " type : str\n", + " 'C' for cumulative cases. \n", + " 'D' for cumulative deaths.\n", + " state : str\n", + " The state where deaths were recorded\n", + " aggregateOn : int or bool\n", + " The weekday to aggregate the observations on.\n", + " 0 is Monday, 6 is Sunday.\n", + " Set to false to prevent aggregation.\n", + "\n", + " Returns\n", + " -------\n", + " pandas.Series : the series of real cases or deaths of the specified state. Indexes are of class pandas.DatetimeIndex.\n", + " \"\"\"\n", + " if(type == 'C'):\n", + " out = pd.Series(RC[RC['location_name'] == state].iloc[:,3].values,\n", + " index = pd.to_datetime(RC[RC['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n", + " name = state + \": Cumulative cases\")\n", + " elif(type == 'D'):\n", + " out = pd.Series(RD[RD['location_name'] == state].iloc[:,3].values,\n", + " index = pd.to_datetime(RD[RD['location_name'] == state].iloc[:,0].values, format=\"%Y-%m-%d\"),\n", + " name = state + \": Cumulative deaths\"\n", + " )\n", + " if(aggregateOn is not False):\n", + " out = out[out.index.weekday == aggregateOn]\n", + "\n", + " return(out)\n", + "\n", + "# Example: getRS('D',Rstates[1])\n", + "\n", + "# # shift series to first non-zero occurence\n", + "# daily_s = daily_s[daily_s>0]\n", + "# # switch aggregation range to weekly (every Saturday)\n", + "# D = daily_s[daily_s.index.weekday == 5]\n", + "\n", + "\n", + "## Forecast data: begins with \"F\"\n", + "# Forecasted cases: FC\n", + "FC = pd.read_csv(\"https://www.cdc.gov/coronavirus/2019-ncov/downloads/cases-updates/2020-10-19-all-forecasted-cases-model-data.csv\")\n", + "# Forecasted deaths: FD\n", + "FD = pd.read_csv('https://www.cdc.gov/coronavirus/2019-ncov/covid-data/files/2020-10-19-model-data.csv')\n", + "\n", + "Fmodels = FD.model.unique()\n", + "Fstates = FD.location_name.unique()\n", + "\n", + "# Forecast Series getter\n", + "def getFS(type, model, state, Fdate):\n", + " \"\"\"Gets the forecasted deaths series by model, state and forecast date\n", + "\n", + " Parameters\n", + " ----------\n", + " type : str\n", + " 'C' for cumulative cases. \n", + " 'D' for cumulative deaths.\n", + " model : str\n", + " The model of the forecast\n", + " state : str\n", + " The target state of the forecast\n", + " Fdate : str or datetime\n", + " The date when the forecast was performed. If a string, provide the format '%Y-%m-%d'.\n", + "\n", + " Returns\n", + " -------\n", + " pandas.DataFrame\n", + " a data frame containing 5 series:\n", + " - point series\n", + " - 2.5% quantile\n", + " - 25% quantile\n", + " - 75% quantile\n", + " - 97.5% quantile\n", + " Indexes are of class pandas.DatetimeIndex.\n", + " \"\"\"\n", + " if(type == 'C'):\n", + " out = FC[(FC.model == model) & (FC.location_name == state) & (FC.forecast_date == Fdate)] \n", + " elif(type == 'D'):\n", + " out = FD[(FD.model == model) & (FD.location_name == state) & (FD.forecast_date == Fdate) & FD.target.apply(str.endswith, args=('cum death',0))]\n", + " else:\n", + " return None\n", + " if( out.empty ):\n", + " return None\n", + " out = pd.DataFrame(out.iloc[:,-5:].values,\n", + " columns = out.columns[-5:],\n", + " index = pd.to_datetime(out.iloc[:,3], format=\"%Y-%m-%d\")\n", + " )\n", + " \n", + " return out\n", + " \n", + "# Example: getFS('C', Fmodels[1], Fstates[1], FD.forecast_date[1])\n", + "\n", + "\n", + "\n", + "\n", + "# prova1 = pd.ExcelFile('Matlab to python/data_models-Florida.xlsx')\n", + "\n", + "# prova1.sheet_names\n", + "\n", + "# prova2 = prova1.parse('Ensamble')\n", + "# prova2\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Alabama', 'Alaska', 'American Samoa', ..., 'Uinta County',\n", + " 'Washakie County', 'Weston County'], dtype=object)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2020-01-25 0\n", + "2020-02-01 0\n", + "2020-02-08 0\n", + "2020-02-15 0\n", + "2020-02-22 0\n", + "2020-02-29 0\n", + "2020-03-07 0\n", + "2020-03-14 0\n", + "2020-03-21 0\n", + "2020-03-28 2\n", + "2020-04-04 5\n", + "2020-04-11 8\n", + "2020-04-18 9\n", + "2020-04-25 9\n", + "2020-05-02 9\n", + "2020-05-09 10\n", + "2020-05-16 10\n", + "2020-05-23 10\n", + "2020-05-30 10\n", + "2020-06-06 10\n", + "2020-06-13 12\n", + "2020-06-20 12\n", + "2020-06-27 14\n", + "2020-07-04 16\n", + "2020-07-11 17\n", + "2020-07-18 18\n", + "2020-07-25 20\n", + "2020-08-01 24\n", + "2020-08-08 26\n", + "2020-08-15 28\n", + "2020-08-22 31\n", + "2020-08-29 37\n", + "2020-09-05 42\n", + "2020-09-12 44\n", + "2020-09-19 45\n", + "2020-09-26 52\n", + "2020-10-03 58\n", + "2020-10-10 60\n", + "2020-10-17 67\n", + "2020-10-24 68\n", + "2020-10-31 82\n", + "2020-11-07 84\n", + "2020-11-14 98\n", + "Name: Alaska: Cumulative deaths, dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " getRS('D',Rstates[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "data= getFS('D', Fmodels[1], Fstates[1], FD.forecast_date[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 74., 67., 71., 78., 84.],\n", + " [ 81., 67., 75., 89., 105.],\n", + " [ 89., 67., 79., 101., 123.],\n", + " [ 99., 67., 81., 111., 141.]])" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.values" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2020-10-24 00:00:00\n", + "2020-10-31 00:00:00\n", + "2020-11-07 00:00:00\n", + "2020-11-14 00:00:00\n" + ] + } + ], + "source": [ + "for i in data.index.tolist():\n", + " print(i)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pointquantile_0.025quantile_0.25quantile_0.75quantile_0.975
target_week_end_date
2020-10-2474.067.071.078.084.0
2020-10-3181.067.075.089.0105.0
2020-11-0789.067.079.0101.0123.0
2020-11-1499.067.081.0111.0141.0
\n", + "
" + ], + "text/plain": [ + " point quantile_0.025 quantile_0.25 quantile_0.75 \\\n", + "target_week_end_date \n", + "2020-10-24 74.0 67.0 71.0 78.0 \n", + "2020-10-31 81.0 67.0 75.0 89.0 \n", + "2020-11-07 89.0 67.0 79.0 101.0 \n", + "2020-11-14 99.0 67.0 81.0 111.0 \n", + "\n", + " quantile_0.975 \n", + "target_week_end_date \n", + "2020-10-24 84.0 \n", + "2020-10-31 105.0 \n", + "2020-11-07 123.0 \n", + "2020-11-14 141.0 " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Timestamp' object has no attribute 'split'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m: 'Timestamp' object has no attribute 'split'" + ] + } + ], + "source": [ + "[i.split() for i in data.index.tolist()]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pointquantile_0.025quantile_0.25quantile_0.75quantile_0.975
target_week_end_date
2020-10-2474.067.071.078.084.0
2020-10-3181.067.075.089.0105.0
2020-11-0789.067.079.0101.0123.0
2020-11-1499.067.081.0111.0141.0
\n", + "
" + ], + "text/plain": [ + " point quantile_0.025 quantile_0.25 quantile_0.75 \\\n", + "target_week_end_date \n", + "2020-10-24 74.0 67.0 71.0 78.0 \n", + "2020-10-31 81.0 67.0 75.0 89.0 \n", + "2020-11-07 89.0 67.0 79.0 101.0 \n", + "2020-11-14 99.0 67.0 81.0 111.0 \n", + "\n", + " quantile_0.975 \n", + "target_week_end_date \n", + "2020-10-24 84.0 \n", + "2020-10-31 105.0 \n", + "2020-11-07 123.0 \n", + "2020-11-14 141.0 " + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2020-10-24', '2020-10-31', '2020-11-07', '2020-11-14'], dtype='datetime64[ns]', name='target_week_end_date', freq=None)" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.index" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2020-10-31 00:00:00'" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "str(data.index[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2020-10-24 00:00:00\n", + "2020-10-31 00:00:00\n", + "2020-11-07 00:00:00\n", + "2020-11-14 00:00:00\n" + ] + } + ], + "source": [ + " for i in pd.Series(data.index.tolist()):\n", + " print(i)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}