This commit is contained in:
Andreaierardi 2020-03-03 18:39:17 +01:00
parent 2d1f9ae9b8
commit e6267fd504
2 changed files with 824 additions and 67 deletions

View File

@ -1,6 +1,681 @@
{
"cells": [],
"metadata": {},
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Diamonds price regression\n",
"\n",
"## Using Machine Lerning regressor\n",
"\n",
"The dataset used is [here](https://www.kaggle.com/shivam2503/diamonds/data)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.23</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>61.5</td>\n",
" <td>55.0</td>\n",
" <td>326</td>\n",
" <td>3.95</td>\n",
" <td>3.98</td>\n",
" <td>2.43</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.21</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>61.0</td>\n",
" <td>326</td>\n",
" <td>3.89</td>\n",
" <td>3.84</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>56.9</td>\n",
" <td>65.0</td>\n",
" <td>327</td>\n",
" <td>4.05</td>\n",
" <td>4.07</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>0.29</td>\n",
" <td>Premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>62.4</td>\n",
" <td>58.0</td>\n",
" <td>334</td>\n",
" <td>4.20</td>\n",
" <td>4.23</td>\n",
" <td>2.63</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>0.31</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>63.3</td>\n",
" <td>58.0</td>\n",
" <td>335</td>\n",
" <td>4.34</td>\n",
" <td>4.35</td>\n",
" <td>2.75</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat cut color clarity depth table price x y z\n",
"1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n",
"2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n",
"3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n",
"4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n",
"5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"df = pd.read_csv(\"Datasets/Diamonds/diamonds.csv\", index_col=0)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df['cut'].unique()\n",
"cut_class_dict = {\"Fair\": 1, \"Good\": 2, \"Very Good\": 3, \"Premium\": 4, \"Ideal\": 5}\n",
"clarity_dict = {\"I3\": 1, \"I2\": 2, \"I1\": 3, \"SI2\": 4, \"SI1\": 5, \"VS2\": 6, \"VS1\": 7, \"VVS2\": 8, \"VVS1\": 9, \"IF\": 10, \"FL\": 11}\n",
"color_dict = {\"J\": 1,\"I\": 2,\"H\": 3,\"G\": 4,\"F\": 5,\"E\": 6,\"D\": 7}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.23</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>61.5</td>\n",
" <td>55.0</td>\n",
" <td>326</td>\n",
" <td>3.95</td>\n",
" <td>3.98</td>\n",
" <td>2.43</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.21</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>59.8</td>\n",
" <td>61.0</td>\n",
" <td>326</td>\n",
" <td>3.89</td>\n",
" <td>3.84</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.23</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>56.9</td>\n",
" <td>65.0</td>\n",
" <td>327</td>\n",
" <td>4.05</td>\n",
" <td>4.07</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>0.29</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>62.4</td>\n",
" <td>58.0</td>\n",
" <td>334</td>\n",
" <td>4.20</td>\n",
" <td>4.23</td>\n",
" <td>2.63</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>0.31</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>63.3</td>\n",
" <td>58.0</td>\n",
" <td>335</td>\n",
" <td>4.34</td>\n",
" <td>4.35</td>\n",
" <td>2.75</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat cut color clarity depth table price x y z\n",
"1 0.23 5 6 4 61.5 55.0 326 3.95 3.98 2.43\n",
"2 0.21 4 6 5 59.8 61.0 326 3.89 3.84 2.31\n",
"3 0.23 2 6 7 56.9 65.0 327 4.05 4.07 2.31\n",
"4 0.29 4 2 6 62.4 58.0 334 4.20 4.23 2.63\n",
"5 0.31 2 1 4 63.3 58.0 335 4.34 4.35 2.75"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['cut'] = df['cut'].map(cut_class_dict)\n",
"df['clarity'] = df['clarity'].map(clarity_dict)\n",
"df['color'] = df['color'].map(color_dict)\n",
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import sklearn\n",
"from sklearn import svm, preprocessing\n",
"from sklearn.linear_model import SGDRegressor\n",
"\n",
"df = sklearn.utils.shuffle(df) # always shuffle your data to avoid any biases that may emerge b/c of some order.\n",
"\n",
"X = df.drop(\"price\", axis=1).values\n",
"X = preprocessing.scale(X)\n",
"y = df[\"price\"].values"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data len: 53940 \n",
"As test we used 20%: 10788.0\n"
]
}
],
"source": [
"len(y)\n",
"print(\"Data len: \",len(y),\"\\nAs test we used 20%: \",20/100*len(y))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## SGD Regressor"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.907304269100812\n"
]
}
],
"source": [
"clf = SGDRegressor(max_iter=1000)\n",
"clf.fit(X_train, y_train)\n",
"\n",
"print(clf.score(X_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2057.938907081024 1716\n",
"408.86668615612143 776\n",
"1877.1359526566246 1850\n",
"14413.45842840683 13317\n",
"6583.122199486379 5880\n",
"1472.7275999563244 2231\n",
"603.3124107228641 666\n",
"-175.58587579503273 705\n",
"732.6492505985616 552\n",
"2763.681359134369 3061\n"
]
}
],
"source": [
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(clf.predict([X])[0], y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## SVR Regressor"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
" \"avoid this warning.\", FutureWarning)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.5235043972946711\n"
]
}
],
"source": [
"from sklearn import svm\n",
"\n",
"clf = svm.SVR()\n",
"\n",
"clf.fit(X_train, y_train)\n",
"print(clf.score(X_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1614.983425223527 1716\n",
"579.9398270257539 776\n",
"2254.067311348471 1850\n",
"5348.406983308956 13317\n",
"5642.754018835578 5880\n",
"2166.566950323401 2231\n",
"763.0511461708497 666\n",
"514.0293281929548 705\n",
"1103.8544288088133 552\n",
"3299.614546713646 3061\n"
]
}
],
"source": [
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(clf.predict([X])[0], y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Support Vector Regression (SVR) with linear kernel"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8630448690848345\n"
]
}
],
"source": [
"clf = svm.SVR(kernel=\"linear\")\n",
"\n",
"clf.fit(X_train, y_train)\n",
"print(clf.score(X_test, y_test))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Support Vector Regression (SVR) with rbf kernel"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
" \"avoid this warning.\", FutureWarning)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.5235043972946711\n"
]
}
],
"source": [
"clf2 = svm.SVR(kernel=\"rbf\")\n",
"\n",
"clf2.fit(X_train, y_train)\n",
"print(clf2.score(X_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1614.98342522, 579.93982703, 2254.06731135, ..., 1600.90972976,\n",
" 863.31271817, 1313.2325644 ])"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predictions = clf2.predict(X_test) # make predictions\n",
"predictions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Random Forest regression"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SCORE: 0.9795971375747344 \n",
"--------------\n",
"1651.2 1716\n",
"748.7833333333333 776\n",
"1823.3 1850\n",
"16492.9 13317\n",
"4931.6 5880\n",
"1935.2 2231\n",
"688.1 666\n",
"689.4 705\n",
"663.8 552\n",
"3171.0 3061\n"
]
}
],
"source": [
"from sklearn.ensemble import RandomForestRegressor\n",
"rf = RandomForestRegressor(n_estimators=10, random_state=0)\n",
"rf.fit(X_train,y_train)\n",
"print(\"SCORE: \",rf.score(X_test, y_test),\"\\n--------------\")\n",
"\n",
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(rf.predict([X])[0], y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Linear Regression"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SCORE: 0.9087525665852461 \n",
"--------------\n",
"2039.5860723087262 1716\n",
"315.57839741764747 776\n",
"1857.7588850776274 1850\n",
"14629.248347536268 13317\n",
"6689.10311583452 5880\n",
"1455.96854276338 2231\n",
"543.9652038627887 666\n",
"-295.936014145806 705\n",
"675.2908110759709 552\n",
"2831.5232911325866 3061\n"
]
}
],
"source": [
"from sklearn import linear_model\n",
"\n",
"linear = linear_model.LinearRegression()\n",
"\n",
"linear.fit(X_train, y_train)\n",
"\n",
"print(\"SCORE: \",linear.score(X_test, y_test),\"\\n--------------\")\n",
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(linear.predict([X])[0], y)\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Logistic regression"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"logistic = linear_model.LogisticRegression(random_state=0) # create object for the class\n",
"logistic.fit(X_train, y_train) # perform logistic regression\n",
"ac = logistic.score(X_test, y_test)\n",
"print(\"SCORE: \",ac ,\"\\n--------------\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Y_pred = logistic.predict(X_test, y_test) # make predictions\n",
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(Y_pred[x], y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(logistic.predict([X])[0], y)\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -13,7 +13,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 1,
"metadata": {},
"outputs": [
{
@ -128,7 +128,7 @@
"5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75"
]
},
"execution_count": 54,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
@ -143,7 +143,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -155,7 +155,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -270,7 +270,7 @@
"5 0.31 2 1 4 63.3 58.0 335 4.34 4.35 2.75"
]
},
"execution_count": 56,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -284,7 +284,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@ -301,7 +301,7 @@
},
{
"cell_type": "code",
"execution_count": 67,
"execution_count": 5,
"metadata": {},
"outputs": [
{
@ -320,7 +320,7 @@
},
{
"cell_type": "code",
"execution_count": 68,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@ -336,14 +336,14 @@
},
{
"cell_type": "code",
"execution_count": 69,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9040861346309637\n"
"0.907304269100812\n"
]
}
],
@ -356,23 +356,23 @@
},
{
"cell_type": "code",
"execution_count": 70,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"15298.319296019743 13919\n",
"12283.622023001368 14386\n",
"5396.202925107901 3951\n",
"4034.4365255612984 2855\n",
"215.1080120323627 645\n",
"3533.2049908575455 2978\n",
"-624.3585716217572 654\n",
"3935.1728997587816 3170\n",
"-1127.3151816200148 450\n",
"4022.1708282842237 2956\n"
"2057.938907081024 1716\n",
"408.86668615612143 776\n",
"1877.1359526566246 1850\n",
"14413.45842840683 13317\n",
"6583.122199486379 5880\n",
"1472.7275999563244 2231\n",
"603.3124107228641 666\n",
"-175.58587579503273 705\n",
"732.6492505985616 552\n",
"2763.681359134369 3061\n"
]
}
],
@ -390,7 +390,7 @@
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 9,
"metadata": {},
"outputs": [
{
@ -405,7 +405,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"0.5413237370675921\n"
"0.5235043972946711\n"
]
}
],
@ -420,23 +420,23 @@
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5122.681145918745 13919\n",
"6632.724385241532 14386\n",
"4567.411354034963 3951\n",
"3261.084788402066 2855\n",
"529.2786025656524 645\n",
"3219.2301461725656 2978\n",
"1002.5617023863538 654\n",
"3440.406994396222 3170\n",
"685.2569483457883 450\n",
"3101.373161450196 2956\n"
"1614.983425223527 1716\n",
"579.9398270257539 776\n",
"2254.067311348471 1850\n",
"5348.406983308956 13317\n",
"5642.754018835578 5880\n",
"2166.566950323401 2231\n",
"763.0511461708497 666\n",
"514.0293281929548 705\n",
"1103.8544288088133 552\n",
"3299.614546713646 3061\n"
]
}
],
@ -454,20 +454,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8630448690848345\n"
]
}
],
"source": [
"clf = svm.SVR(kernel=\"linear\")\n",
"\n",
"clf.fit(X_train, y_train)\n",
"print(clf.score(X_test, y_test))\n",
"\n",
"predictions_lin = clf.predict(X_test, y_test) # make predictions\n",
"\n",
"acc = clf.accuracy_score(y_test, predictions_lin)\n",
"\n",
"print(\"Accuracy: \",acc)"
"print(clf.score(X_test, y_test))\n"
]
},
{
@ -479,20 +481,52 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 31,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
" \"avoid this warning.\", FutureWarning)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.5235043972946711\n"
]
}
],
"source": [
"clf = svm.SVR(kernel=\"rbf\")\n",
"clf2 = svm.SVR(kernel=\"rbf\")\n",
"\n",
"clf.fit(X_train, y_train)\n",
"print(clf.score(X_test, y_test))\n",
"\n",
"predictions = clf.predict(X_test, y_test) # make predictions\n",
"\n",
"acc = clf.accuracy_score(y_test, predictions)\n",
"\n",
"print(\"Accuracy: \",acc)"
"clf2.fit(X_train, y_train)\n",
"print(clf2.score(X_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1614.98342522, 579.93982703, 2254.06731135, ..., 1600.90972976,\n",
" 863.31271817, 1313.2325644 ])"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predictions = clf2.predict(X_test) # make predictions\n",
"predictions"
]
},
{
@ -504,14 +538,33 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 34,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SCORE: 0.9795971375747344 \n",
"--------------\n",
"1651.2 1716\n",
"748.7833333333333 776\n",
"1823.3 1850\n",
"16492.9 13317\n",
"4931.6 5880\n",
"1935.2 2231\n",
"688.1 666\n",
"689.4 705\n",
"663.8 552\n",
"3171.0 3061\n"
]
}
],
"source": [
"from sklearn.ensemble import RandomForestRegressor\n",
"rf = RandomForestRegressor(n_estimators=10, random_state=0)\n",
"rf.fit(X_train,y_train)\n",
"print(\"SCORE: \",rf.score(X_test, y_test))\n",
"print(\"SCORE: \",rf.score(X_test, y_test),\"\\n--------------\")\n",
"\n",
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(rf.predict([X])[0], y)"
@ -526,14 +579,25 @@
},
{
"cell_type": "code",
"execution_count": 80,
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.905111184064965\n"
"SCORE: 0.9087525665852461 \n",
"--------------\n",
"2039.5860723087262 1716\n",
"315.57839741764747 776\n",
"1857.7588850776274 1850\n",
"14629.248347536268 13317\n",
"6689.10311583452 5880\n",
"1455.96854276338 2231\n",
"543.9652038627887 666\n",
"-295.936014145806 705\n",
"675.2908110759709 552\n",
"2831.5232911325866 3061\n"
]
}
],
@ -543,7 +607,11 @@
"linear = linear_model.LinearRegression()\n",
"\n",
"linear.fit(X_train, y_train)\n",
"print(linear.score(X_test, y_test))"
"\n",
"print(\"SCORE: \",linear.score(X_test, y_test),\"\\n--------------\")\n",
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(linear.predict([X])[0], y)\n",
" "
]
},
{
@ -570,13 +638,12 @@
}
],
"source": [
"from sklearn import linear_model\n",
"from sklearn.linear_model import LogisticRegression\n",
"logistic = LogisticRegression(random_state=0) # create object for the class\n",
"logistic = linear_model.LogisticRegression(random_state=0) # create object for the class\n",
"logistic.fit(X_train, y_train) # perform logistic regression\n",
"ac = logistic.score(X_test, y_test)\n",
"Y_pred = logistic.predict(X_test, y_test) # make predictions\n",
"\n",
"print(\"Accuracy: \",ac)"
"print(\"SCORE: \",ac ,\"\\n--------------\")"
]
},
{
@ -584,7 +651,22 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"Y_pred = logistic.predict(X_test, y_test) # make predictions\n",
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(Y_pred[x], y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for X,y in list(zip(X_test, y_test))[:10]:\n",
" print(logistic.predict([X])[0], y)\n",
" "
]
}
],
"metadata": {