Master-DataScience-Notes/1anno/2trimestre/Coding for DataScience/Python/ML - Linear regression, KNN, SVM.ipynb

1231 lines
101 KiB
Plaintext
Raw Normal View History

2020-03-01 13:09:38 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ML - Linear regression, KNN, SVM"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Linear regression"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" school sex age address famsize Pstatus Medu Fedu Mjob Fjob ... \\\n",
"0 GP F 18 U GT3 A 4 4 at_home teacher ... \n",
"1 GP F 17 U GT3 T 1 1 at_home other ... \n",
"2 GP F 15 U LE3 T 1 1 at_home other ... \n",
"3 GP F 15 U GT3 T 4 2 health services ... \n",
"4 GP F 16 U GT3 T 3 3 other other ... \n",
"\n",
" famrel freetime goout Dalc Walc health absences G1 G2 G3 \n",
"0 4 3 4 1 1 3 6 5 6 6 \n",
"1 5 3 3 1 1 3 4 5 5 6 \n",
"2 4 3 2 2 3 3 10 7 8 10 \n",
"3 3 2 2 1 1 5 2 15 14 15 \n",
"4 4 3 2 1 2 5 4 6 10 10 \n",
"\n",
"[5 rows x 33 columns]\n"
]
}
],
"source": [
"#Import Library\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn import linear_model\n",
"import sklearn\n",
"from sklearn.utils import shuffle\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib import style\n",
"import pickle\n",
"\n",
"data = pd.read_csv(\"Datasets/Students/student-mat.csv\", sep=\";\")\n",
"# Since our data is seperated by semicolons we need to do sep=\";\"\n",
"print(data.head())"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"data = data[[\"G1\", \"G2\", \"G3\", \"studytime\", \"failures\", \"absences\"]]\n",
"predict = \"G3\"\n",
"\n",
"X = np.array(data.drop([predict], 1)) # Features\n",
"y = np.array(data[predict]) # Labels"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.1)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9072825900231904\n",
"Coefficient: \n",
" [ 0.13527031 0.98751848 -0.20498797 -0.33339246 0.0360702 ]\n",
"Intercept: \n",
" -1.3353139222187735\n",
"11.953607354545625 [11 12 2 0 10] 13\n",
"8.648370883609443 [9 9 2 0 8] 9\n",
"6.748413614780693 [ 6 8 1 3 16] 8\n",
"14.253304400739522 [15 14 2 0 4] 14\n",
"8.012543885152066 [10 8 2 0 14] 9\n",
"18.95845802835205 [18 18 1 0 8] 18\n",
"8.6393604136566 [10 9 2 0 4] 11\n",
"13.130515603322937 [14 13 2 0 4] 13\n",
"8.524114358116751 [ 8 8 1 0 30] 8\n",
"9.358341590467683 [ 9 9 1 0 22] 9\n",
"18.440271505465734 [18 18 3 0 5] 19\n",
"9.83186686648655 [10 10 1 0 4] 10\n",
"-0.5278570449856139 [9 0 2 0 0] 0\n",
"5.831032204076057 [5 8 2 3 0] 7\n",
"8.538406750456677 [ 7 10 3 1 0] 10\n",
"15.033412181729727 [14 15 2 0 2] 15\n",
"10.677527304643373 [11 11 2 0 2] 11\n",
"9.13533268256865 [11 9 2 0 14] 9\n",
"10.54225699206339 [10 11 2 0 2] 12\n",
"13.479784353849112 [14 13 1 0 8] 13\n",
"10.677527304643373 [11 11 2 0 2] 11\n",
"16.131563989318995 [15 16 3 0 7] 15\n",
"12.025747745812037 [11 12 2 0 12] 11\n",
"-1.0033856381389292 [7 0 3 0 0] 0\n",
"3.542127489909987 [6 5 1 2 0] 0\n",
"6.4544900042839535 [8 7 1 0 0] 6\n",
"-1.6660055594329843 [4 0 1 2 0] 0\n",
"13.973753305626712 [14 14 2 0 0] 14\n",
"11.020208321069754 [13 11 2 0 4] 11\n",
"8.089796550968265 [ 9 9 2 2 11] 9\n",
"15.351456208328791 [16 15 3 0 9] 16\n",
"13.446136894068955 [15 13 2 0 9] 15\n",
"11.953885441248763 [14 12 2 1 8] 12\n",
"-0.9268021619226335 [7 0 1 1 0] 0\n",
"13.381531513205376 [15 14 3 2 4] 15\n",
"16.354601213039867 [17 16 2 0 0] 17\n",
"10.812797617223357 [12 11 2 0 2] 12\n",
"12.197088254025228 [12 12 2 0 13] 13\n",
"12.866562712262764 [10 13 1 0 6] 13\n",
"7.2370205211272065 [8 8 2 0 0] 0\n"
]
}
],
"source": [
"\n",
"linear = linear_model.LinearRegression()\n",
"\n",
"linear.fit(x_train, y_train)\n",
"acc = linear.score(x_test, y_test)\n",
"print(acc)\n",
"\n",
"print('Coefficient: \\n', linear.coef_)\n",
"print('Intercept: \\n', linear.intercept_)\n",
"\n",
"predictions = linear.predict(x_test)\n",
"\n",
"for x in range(len(predictions)):\n",
" print(predictions[x], x_test[x], y_test[x])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Saving model and importing"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.utils import shuffle\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib import style\n",
"import pickle"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Saving "
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"with open(\"Models/studentgrades.pickle\", \"wb\") as f:\n",
" pickle.dump(linear, f)\n",
"\n",
"# linear is the name of the model we created in the last tutorial\n",
"# it should be defined above this"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Loading model "
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"pickle_in = open(\"Models/studentgrades.pickle\", \"rb\")\n",
"linear = pickle.load(pickle_in)\n",
"\n",
"# Now we can use linear to predict grades like before"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Training multiple model\n"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.7120505837839677\n",
"Accuracy: 0.8419740188965523\n",
"Accuracy: 0.8020262406782782\n",
"Accuracy: 0.6940409192793664\n",
"Accuracy: 0.8940235536728673\n",
"Accuracy: 0.8145923637342816\n",
"Accuracy: 0.8471013396643304\n",
"Accuracy: 0.7832990829886533\n",
"Accuracy: 0.8054626583260116\n",
"Accuracy: 0.8402321980392969\n",
"Accuracy: 0.8331033509590885\n",
"Accuracy: 0.8458394692949511\n",
"Accuracy: 0.9357285240967607\n",
"Accuracy: 0.9110387297485635\n",
"Accuracy: 0.7468333796226976\n",
"Accuracy: 0.7767352849982638\n",
"Accuracy: 0.8364676561627661\n",
"Accuracy: 0.8527521275793825\n",
"Accuracy: 0.8992992485819427\n",
"Accuracy: 0.7452038544962007\n",
"\n",
"Best accuracy: 0.9357285240967607\n"
]
}
],
"source": [
"# TRAIN MODEL MULTIPLE TIMES FOR BEST SCORE\n",
"best = 0\n",
"for _ in range(20):\n",
" x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.1)\n",
"\n",
" linear = linear_model.LinearRegression()\n",
" \n",
" linear.fit(x_train, y_train)\n",
" acc = linear.score(x_test, y_test)\n",
" print(\"Accuracy: \" + str(acc))\n",
" \n",
" # If the current model has a better score than one we've already trained then save it\n",
" if acc > best:\n",
" best = acc\n",
" with open(\"Models/studentgrades.pickle\", \"wb\") as f:\n",
" pickle.dump(linear, f)\n",
" \n",
"print(\"\\nBest accuracy: \", best)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting data"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No handles with labels found to put in legend.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEJCAYAAABlmAtYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de3hTZZ4H8G8uLaW30KSlTAHFgghlKCwDVkFpgdLhwVGR7dRlAGVGxkHqKuDggjgIItgVOkUeQFyLoOI+a2URcZdxsMsCLiIWKnfLRS4ziLa0Kb0X2uTsH6Fp0+Z2Tt8kp+338zw8NG+SN7+mKT/Oed/f+WkkSZJARETkhjbQARARkfoxWRARkUdMFkRE5BGTBRERecRkQUREHjFZEBGRR/pAB+BL165dC3QIiI6ORmlpaaDDaINxycO45GFc8qglrri4OJf38ciCiIg8YrIgIiKPmCyIiMgjJgsiIvKIyYKIiDzyy26o0tJSbNiwATdu3IBGo0FqaiomT56M6upq5OTk4Pr164iJicH8+fMRHh7e5vn79u3Djh07AABTp05FSkqKP8ImIuowrNd/Aj79ENINMzQ9jMCj06GN6SVsfr8kC51Oh5kzZyI+Ph51dXVYtGgREhMTsW/fPgwdOhRTpkzBzp07sXPnTsyYMcPhudXV1di+fTuysrIAAIsWLcLIkSOdJhUioq7Iev0nSDlLges/AQAkALh4Ftb5rwpLGH45DRUVFYX4+HgAQPfu3dG7d2+YzWYUFBQgOTkZAJCcnIyCgoI2zz127BgSExMRHh6O8PBwJCYm4tixY/4Im4ioY/j0Q3uisLt9pCGK34vySkpKcOnSJQwYMAAVFRWIiooCYEsolZWVbR5vNpthMpnst41GI8xms9O58/PzkZ+fDwDIyspCdHS0D74DefR6vSriaI1xycO45GFc8rQ3LnNNFRqczVtTBaOg79evyaK+vh7Z2dmYNWsWQkNDFc+j0WicjqempiI1NdV+Ww0VkWqpzGyNccnDuORhXPK0Ny5rWITT8cawCFnzqqKCu7GxEdnZ2XjwwQeRlJQEADAYDCgvLwcAlJeXIzIyss3zjEYjysrK7LfNZrP9aISIiAA8Oh1ovTYR08s2LohfkoUkSdi0aRN69+6NX/3qV/bxkSNHYv/+/QCA/fv3Y9SoUW2eO3z4cBw/fhzV1dWorq7G8ePHMXz4cH+ETUTUIWhjekEz/1VokpKBe4ZCk5QMjcDFbcBPp6HOnj2LAwcO4I477sDChQsBANOmTcOUKVOQk5ODvXv3Ijo6GgsWLAAAfP/99/jiiy8wZ84chIeH4x//8R+xePFiAEB6ejp3QhERtaKN6QXMfsFn82skSZJ8NnuA8aqzrjEueRiXPIxLHrXEpYo1CyIi6rg6dT8LIiKgubrZXFNl2zkkuLpZKcvh/cD761Hc2ADog4AnnoUuKVnRXJ2igpuIKFBaVjfbaxEEVzcrYTm8H8jNbh64dRPIzYYFkJ0wOk0FNxFRwPihulmR99fLG3fHD98jkwURdWrSDedXfHA17jcNzmqu3Yy74Y/vkcmCiDo1TQ+jrHG/CQqSN+6GP75HJgsi6tz8UN2syBPPyht3xw/fIxe4iahT08b0gnX+q8CnH0JfU4VGleyG0iUlwwLY1ijauRuq5ffI3VBERAo1VTcbVVL81kSXlAwkJQspyvN1BTdPQxERkUdMFkRE5BFPQxGRKomsSBZZKd00FxoabDuX2jPXF7uA7e+i2GoFtFog/XfQTXxE0Vys4CaiLkdkRbLISmmhc32xC8jLbR6wWoG8XNtcMhMGK7iJqGsSWZEsslJa5Fzb35U37g4ruImoKxJakSywUlroXFarvHE3WMFNRF2S0IpkgZXSQufSuvjn19W4G/6o4PbLmsXGjRtRWFgIg8GA7Gzb+b6cnBx7c6La2lqEhoZi9erVbZ6bmZmJkJAQaLVa6HQ6ZGVl+SNkIgqkR6cDF886nlpRWpH8xLOO6wwtxwM5V/rvHNcsWo7LJfL9csEvySIlJQWTJk3Chg0b7GPz58+3f/3+++8jNDTU5fNfeeUVREZG+jRGIlIPkRXJIiulHeZq524o3cRHbHNtf9d26qkdu6E6TQV3QkICSkpKnN4nSRIOHTqEpUuX+iMUIuogRFYki6yUbppLSFwTHwEmPtIhKrgDvnX2u+++g8FgwM9+9jOXj1m5ciUAYOLEiUhNTXX5uPz8fOTn5wMAsrKyEB0dLTZYBfR6vSriaI1xycO45GFc8qg1rpYCniwOHjyIMWPGuLx/xYoVMBqNqKiowGuvvYa4uDgkJCQ4fWxqaqpDMlHDNWDU0oi9NcYlD+OSh3HJo5a44uLiXN4X0GRhsVjwzTffuF20Nhptq/kGgwGjRo3ChQsXXCYLIiJnRFZKW4pOAlvfBGprgNAwYNbz0A0aKjhi9Qno1tmTJ08iLi4OJpPJ6f319fWoq6uzf33ixAnccccd/gyRiDo4e6V0U/1CU6X0F7vkz1V0EshZCpSVAHU1tr9zltrGOzm/HFmsXbsWZ86cQVVVFebMmYOMjAyMHz/e6Skos9mMt99+G4sXL0ZFRQXWrFkDwHYU8sADD2D48OH+CJmIOgt3ldJyjy62vglYLY5jVottPMvJNthOxC/JYt68eU7HMzMz24wZjUYsXrwYABAbG+u09oKIyGsCK6VRWyNvvBNhBTcRdW4CK6URGiZvvBNhsiCizs1VRbSSSulZzwNaneOYVmcb7+SYLIioU9NNfATImN18JKHVAhmzFe2G0g0aCsx/FTD1BLqH2f6e/2qX2A0V8DoLIiJfE1kprRs0tNMvZjvDIwsiIvKIyYKIiDziaSiiLk5or+tPtgG781DcNDA5A7rHZiibS6W9rkVWcDe99+aaKljDIoRfKVYkJguiLkxor+vbicLB7jxbT2mZCUOtva7tFdxNhXl1NbYKbgWL3C3fe3ufPcF9s0XiaSiirkxk7+bWicLTuDtq7XXtroJbLj/0zRaJyYKoC/NH72ZFVNrrWmQFt2rfexeYLIi6MH/0blZEpb2uRVZwq/a9d4HJgqgre3S6rVdzS0p7N0/OkDfujque1kp7XcsZd0dkBbfI994PdMuWLVsW6CB8paqqKtAhIDQ0FLW1tYEOow3GJU9njUsTFg4kjoKmuhIIj4RmwGBoZj2vaIFVOzgRksUCnD/dPKhwN5S2Tz9IsXHAqULAKgHBwcBv5ynaDaXtfw+k7mHAd8cASbIdUfz6KWW9rqNjId09BDh3yjZgiAKeWaxoN1TL9z4oygTproGK33tRIiIiXN6nkSRJ8mMsfnXt2rVAh6CaDlitMS55GJc8jEsetcTlrlMeT0MREZFHTBZEROSRX4ryNm7ciMLCQhgMBmRn2wpt8vLy8D//8z+IjIwEAEybNg0jRoxo89xjx45hy5YtsFqtmDBhAqZMmeKPkImEE1opfbu6ubixAdCLqW6GiOpmkRXcr78IXCxqHogfBN3iN5TNlf0noOh4c1yDhkH3wgpFc4n8OXYkfkkWKSkpmDRpEjZs2OAw/tBDD+GRR1x/KK1WKzZv3oyXX34ZJpMJixcvxsiRI9GnTx9fh0wklNBKabVWN4us4G6dKADgYhEsr78oO2E0JQoHRcdhyf6T7IQh8ufY0fjlNFRCQgLCw8NlP+/ChQvo1asXYmNjodfrMXr0aBQUFPggQiIfE1mtq9bqZpEV3K0Thadxd1onCk/j7nSwqmuRAnptqL/+9a84cOAA4uPj8cQTT7RJKGazGSaTyX7bZDLh/PnzLufLz89Hfn4+ACArKwvR0dG+CVwGvV6vijhaY1zytDcuc00VnNUe62uqYJQ5b3GjiyrmxgbZMRa7qW6WPZeb+zrLXCJ/jg7PV+nnvqWAJYu0tDSkp6cDAD766CO8//77mDt3rsNjnO3q1Wg0LudMTU1Famqq/bYatqKpZUtca4xLnvbGZQ1zvn+9MSxC/rz6INupJyfjsufSap1f9kKrFfpz6CxzCf05tqCWz70qt8726NEDWq0WWq0WEyZMwPfff9/mMSa
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Drawing and plotting model\n",
"style.use(\"ggplot\")\n",
"plot = \"G1\" # Change this to G1, G2, studytime or absences to see other graphs\n",
"plt.scatter(data[plot], data[\"G3\"]) \n",
"plt.legend(loc=4)\n",
"plt.xlabel(plot)\n",
"plt.ylabel(\"Final Grade\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No handles with labels found to put in legend.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEJCAYAAABlmAtYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de1BUZ5oG8Kf7NIgN2EKDMHjJBBNjcERjMDgmCiqylkklxqHMOF7WyRrjgpsAGTM6Omq8hYphMJa6pmJiNJPd0rhqTK2V0R4XzBo1RgQviRe87RgTEFqRq0L32T86tPSNPt2nLwd5flUW9Nvn47y0DS/nnO87r0oURRFEREQdUAc7ASIiUj4WCyIicovFgoiI3GKxICIit1gsiIjILRYLIiJySxPsBPzpxo0bXo2LiYlBdXW1j7PxHeYnD/OTh/nJo+T8EhISXD7HIwsiInKLxYKIiNxisSAiIrdYLIiIyC0WCyIicisgs6Gqq6uxYcMG3L59GyqVChkZGZg4cSLq6+tRVFSEmzdvIjY2Fnl5eYiIiHAYX1xcjF27dgEAJk+ejPT09ECkTUTUaZhv/gR8/inE20aoekYDL0yDOjbeZ18/IMVCEATMmDEDiYmJaGpqwoIFC5CcnIzi4mIMHjwYkyZNwp49e7Bnzx5Mnz7dZmx9fT127tyJgoICAMCCBQuQkpLitKgQEXVF5ps/QSxaAtz8CQAgAsDl8zDnLfdZwQjIaaioqCgkJiYCALp3747evXvDaDTi+PHjSEtLAwCkpaXh+PHjDmPLysqQnJyMiIgIREREIDk5GWVlZYFIm4ioc/j8U2uhsPr5SMNXAr4or6qqCleuXMEjjzyC2tpaREVFAbAUlDt37jhsbzQaodfrrY+jo6NhNBqdfm2DwQCDwQAAKCgoQExMjFc5ajQar8cGAvOTh/nJw/zk8Ud+xoY6tDjbV0Mdon20r4AWi+bmZhQWFmLWrFnQarVefx2VSuU0npGRgYyMDOtjb1dJKnmFJcD85GJ+8jA/efyRnzk80mm8NTzSo30pYgV3a2srCgsLMWrUKKSmpgIAdDodbt26BQC4desWevTo4TAuOjoaNTU11sdGo9F6NEJERABemAbYX5uIjbfEfSQgxUIURWzatAm9e/fGc889Z42npKSgpKQEAFBSUoLhw4c7jB06dCjKy8tRX1+P+vp6lJeXY+jQoYFIm4ioU1DHxkOVtxyq1DTgscFQpaZB5cOL20CATkOdP38ehw4dQr9+/TB//nwAwNSpUzFp0iQUFRXh4MGDiImJQX5+PgDg0qVLOHDgAObOnYuIiAj85je/wcKFCwEAWVlZnAlFRGRHHRsPzH7Db19fJYqi6LevHmS862xwMD95mJ88zM97irhmQUREndcD3c+CiKizMB0rAbatB1pagJAQYOY8CKlp0sefOw18/B7Q2ABow4FZr0MYONhn+fHIgogoyEzHSoDNhcC9u4BotnzcXGiJSxl/7jRQtASoqQKaGiwfi5ZY4j7CYkFEFGzb1nsWt/fxe4DZZBszmyxxH2GxICIKthZn6687iNtrbPAs7gUWCyKiYAsJ8SxuTxvuWdwLLBZERME2c55ncXuzXgfUgm1MLVjiPsJiQUQUZEJqmmVBXWg3QKW2fJz9huTZUMLAwUDeckDfC+gebvmYt9yns6E4dZaISAGE1DTAg6myDuMHDgYKNvswI1s8siAiIrdYLIiIyC2ehiIigvwe1nJXUJsK/wycK78fGDgEwhsrPPkW/IpHFkTU5bX1sBaPlQDnT0M8VgKxaImlgEggdwW1Q6EAgHPllrhCsFgQEcntYS13BbV9oXAXDwIWCyLq8sTbRo/iDgKwgjrYWCyIqMtT9Yz2KO4gACuogy0gF7g3btyI0tJS6HQ6FBYWAgCKioqszYkaGxuh1WqxZs0ah7E5OTkICwuDWq2GIAgoKCgIRMpE1JW8MA24fN72VJQnPaxnvW65ZtH+VJQnK6gHDnF+ymngEGnjAyAgxSI9PR0TJkzAhg0brLG8vDzr59u2bYNWq3U5funSpejRo4dfcySirksdGw9z3nKvZ0MJAwfDlLfc69lQwhsrFD8bKiDFIikpCVVVVU6fE0URR44cwZIlSwKRChGRU3J7WMtdQa2kwuBM0NdZfP/999DpdPjFL37hcptVq1YBAMaPH4+MjAyX2xkMBhgMBgBAQUEBYmJivMpJo9F4PTYQmJ88zE8e5ieP0vNzJejF4vDhw3j66addPr9ixQpER0ejtrYWK1euREJCApKSkpxum5GRYVNMvG2KruSG6gDzk4v5ycP85FFyfgkJCS6fC2qxMJlM+Oabbzq8aB0dbZmNoNPpMHz4cFRUVLgsFkTUdbWtwDY21MEcHhn4Fdgye2grXVCnzp4+fRoJCQnQ6/VOn29ubkZTU5P181OnTqFfv36BTJGIOoH2K7BbzpQGfgW2zB7anUFAisXatWuxePFi3LhxA3PnzsXBgwcBOD8FZTQa8fbbbwMAamtrsWTJEsyfPx9/+tOfMGzYMAwdOjQQKRNRZxLsFdhye2h3AgE5DZWbm+s0npOT4xCLjo7GwoULAQBxcXFO114QEbUX9BXYcntodwJcwU1EnV7QV2DL7aHdCbBYEFHn98I0y4rr9jxdgS2nh7XcHtqdQNCnzhIRydV+BbamoQ6tHs6Gkr0COzUNJuCBng3FYkFED4S2FdjRXq5jkL0CW2YPbaXjaSgiInKLxYKIiNziaSgieiC03bW1si3g4V1bTZv/Ahwrvh9ITYcwO9+XKXZqPLIgok5Pbg9rh0IBAMeKLXECwGJBRA8CuT2s7QuFu3gXxGJBRERusVgQEZFbLBZE1Pm56lUttYd1arpn8S6IxYKIOj3hjRWOhcGD2VDC7HzHwsDZUDY4dZaIHghthcHbTnTC7HyAxcElHlkQEZFbLBZERORWQE5Dbdy4EaWlpdDpdCgsLAQA7NixA3//+9/Ro0cPAMDUqVMxbNgwh7FlZWXYsmULzGYzxo0bh0mTJgUiZSLyUFsPbPG20dJHwtMe2J9uAor33Q+kT4Qwba708a88DwD3V3ADED7YK338A95DW66AFIv09HRMmDABGzZssIk/++yzeP75512OM5vN+PDDD7F48WLo9XosXLgQKSkp6NOnj79TJiIPtPXAbmttKgLA5fMw5y2XVDAcCgUAFO+DCZBUMNoKhbO4lIJh7aHdpq2HNsCC8bOAnIZKSkpCRESEx+MqKioQHx+PuLg4aDQajBw5EsePH/dDhkQki9we2PaFwl3c17pAD225gjob6m9/+xsOHTqExMREzJw506GgGI1G6PV662O9Xo+LFy+6/HoGgwEGgwEAUFBQgJiYGK/y0mg0Xo8NBOYnD/OTx1l+xoY6OOs2rWmoQ7SE76Wyg+ekvBayx7e66JXd2uLz/wul//+6ErRikZmZiaysLADA9u3bsW3bNmRnZ9tsI4qiwziVSuXya2ZkZCAjI8P62Jvpc4D3U+8ChfnJw/zkcZafOTzS6bat4ZGyv5eAjNeEWE49OYn7+v9Cyf+/CQkJLp8L2myonj17Qq1WQ61WY9y4cbh06ZLDNnq9HjU1NdbHNTU1iIqKCmSaRCSF3B7Y6RM9i/taF+ihLVfQisWtW7esn3/zzTfo27evwzb9+/fHjz/+iKqqKrS2tuLrr79GSkpKINMkIgnUsfFQ5S2HKjUNeGwwVKlpUEm8uA38fBHbvjB4MBvK1UVsqbOhhNQ0YPYbQGg3QKW2fJz9Bi9ut6MSnZ3r8bG1a9fiu+++Q11dHXQ6HaZMmYKzZ8/i6tWrUKlUiI2NxZw5cxAVFQWj0Yj3338fCxcuBACUlpZi69atMJvNGDNmDCZPnix5vzdu3PAqXyUfJgLMTy7mJw/zk0fJ+XV0GiogxSJYWCyCg/nJw/zkYX7eU+Q1CyIi6jx4I0EiAgCYzp0GPn4PaGwAtOHArNchDBwsffzbbwKXz90PJA6EsPAd6eP/8Hug9v6EFuj0EN7dInl82wpyY0OdZXaWhyvIqWM8siAiS6EoWgLUVAFNDZa
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot = \"G2\" # Change this to G1, G2, studytime or absences to see other graphs\n",
"plt.scatter(data[plot], data[\"G3\"]) \n",
"plt.legend(loc=4)\n",
"plt.xlabel(plot)\n",
"plt.ylabel(\"Final Grade\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# K-Nearest Neighbors (KNN)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" buying maint door persons lug_boot safety class\n",
"0 vhigh vhigh 2 2 small low unacc\n",
"1 vhigh vhigh 2 2 small med unacc\n",
"2 vhigh vhigh 2 2 small high unacc\n",
"3 vhigh vhigh 2 2 med low unacc\n",
"4 vhigh vhigh 2 2 med med unacc\n"
]
}
],
"source": [
"import sklearn\n",
"from sklearn.utils import shuffle\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn import linear_model, preprocessing\n",
"\n",
"data = pd.read_csv(\"Datasets/Car Data Set/car.data\")\n",
"print(data.head())\n",
"\n",
"le = preprocessing.LabelEncoder()\n",
"buying = le.fit_transform(list(data[\"buying\"]))\n",
"maint = le.fit_transform(list(data[\"maint\"]))\n",
"door = le.fit_transform(list(data[\"door\"]))\n",
"persons = le.fit_transform(list(data[\"persons\"]))\n",
"lug_boot = le.fit_transform(list(data[\"lug_boot\"]))\n",
"safety = le.fit_transform(list(data[\"safety\"]))\n",
"cls = le.fit_transform(list(data[\"class\"]))\n",
" \n",
"predict = \"class\" #optional\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy model: 0.9479768786127167\n"
]
}
],
"source": [
"X = list(zip(buying, maint, door, persons, lug_boot, safety))\n",
"y = list(cls)\n",
"\n",
"x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size = 0.1)\n",
"\n",
"model = KNeighborsClassifier(n_neighbors=9)\n",
"\n",
"model.fit(x_train, y_train)\n",
"acc = model.score(x_test, y_test)\n",
"print(\"Accuracy model: \",acc)\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted: good Data: (3, 3, 3, 2, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[1237, 705, 154, 339, 1066, 1092, 239, 757, 1143]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (3, 2, 2, 2, 2, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 680, 814, 913, 466, 842, 266, 927, 508, 1218]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 0, 3, 1, 0, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1115, 94, 1417, 673, 1000, 1512, 796, 11, 510]],\n",
" dtype=int64))\n",
"Predicted: acc Data: (1, 2, 3, 1, 2, 0) Actual: acc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 460, 1480, 1498, 942, 1268, 78, 766, 1521, 970]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 0, 2, 0, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 571, 1239, 222, 76, 31, 1546, 531, 1126, 211]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 3, 1, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1161, 1285, 1110, 1290, 1347, 1189, 118, 695, 1171]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 1, 2, 2, 1, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 194, 1086, 82, 1238, 1025, 998, 173, 1483, 440]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 2, 3, 1, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1339, 306, 999, 663, 1524, 865, 1470, 168, 1515]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 2, 1, 0, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 721, 487, 1241, 867, 261, 15, 119, 1529, 1013]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 2, 1, 1, 0, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[181, 887, 809, 238, 529, 433, 866, 736, 34]], dtype=int64))\n",
"Predicted: good Data: (0, 3, 3, 0, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[ 795, 644, 979, 526, 91, 437, 1425, 1545, 619]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (2, 2, 0, 2, 0, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 883, 598, 48, 52, 1294, 21, 423, 1325, 574]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 1, 2, 2, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1125, 267, 548, 1029, 1372, 1473, 732, 353, 422]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 0, 3, 2, 0, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1.41421356,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[ 240, 385, 1099, 1483, 445, 1115, 1177, 1301, 370]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 2, 0, 0, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 299, 368, 166, 1131, 277, 92, 164, 403, 1036]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 1, 2, 0, 0, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 337, 1348, 309, 836, 941, 928, 620, 831, 533]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 3, 0, 1, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 910, 380, 593, 282, 1314, 1313, 546, 226, 495]],\n",
" dtype=int64))\n",
"Predicted: vgood Data: (2, 2, 2, 1, 1, 0) Actual: vgood\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 679, 622, 754, 37, 805, 1218, 1084, 83, 431]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 3, 1, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 350, 586, 357, 872, 803, 1552, 142, 222, 1115]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 3, 2, 2, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1021, 79, 1309, 1326, 1365, 667, 1068, 732, 475]],\n",
" dtype=int64))\n",
"Predicted: acc Data: (1, 1, 1, 2, 0, 2) Actual: acc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 410, 416, 1477, 369, 969, 66, 1360, 1172, 181]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 0, 0, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1020, 656, 747, 456, 1071, 1010, 605, 120, 853]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 1, 1, 1, 2) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 27, 895, 930, 745, 204, 1201, 115, 1400, 1491]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 1, 3, 2, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1388, 1034, 1252, 652, 611, 1353, 1125, 548, 255]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 0, 2, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[1024, 773, 1482, 1171, 901, 1394, 444, 176, 941]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 3, 2, 0, 0, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 2, 84, 1192, 1152, 1374, 159, 1081, 1532, 207]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 3, 1, 1, 2, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1122, 345, 879, 256, 1457, 1042, 701, 1410, 1170]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 2, 1, 2, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 970, 1076, 1426, 512, 1548, 476, 1455, 1265, 1001]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 2, 0, 1, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1062, 529, 400, 433, 763, 1419, 647, 633, 1211]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 1, 3, 2, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 634, 30, 255, 705, 1478, 1038, 1385, 770, 201]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 1, 2, 2, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 201, 770, 150, 923, 112, 1388, 748, 1353, 1308]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 0, 0, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 185, 456, 987, 1322, 692, 1071, 1074, 55, 774]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 3, 3, 1, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1437, 371, 242, 1451, 93, 794, 1193, 498, 332]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 3, 0, 0, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 920, 380, 463, 546, 119, 693, 15, 1122, 1212]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 0, 2, 2, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 214, 326, 20, 353, 573, 1308, 784, 1493, 1385]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 1, 0, 1, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1045, 1297, 1235, 919, 637, 496, 453, 1463, 412]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 2, 2, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 561, 440, 811, 381, 1250, 1133, 82, 259, 411]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 3, 2, 1, 1, 2) Actual: unacc\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 894, 1324, 1021, 761, 992, 643, 830, 823, 1326]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 3, 0, 0, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 645, 90, 1393, 1176, 1424, 731, 1479, 1274, 510]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 0, 1, 2, 1, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 746, 193, 407, 228, 224, 1044, 1243, 1278, 1027]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (1, 0, 0, 2, 1, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1094, 439, 676, 1416, 500, 1375, 746, 193, 819]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 0, 2, 2, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[ 893, 1483, 1234, 126, 1543, 1475, 673, 531, 194]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 0, 2, 1, 1, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 20, 326, 901, 1491, 178, 528, 444, 770, 1385]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 2, 2, 0, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 867, 1407, 927, 915, 1336, 361, 1529, 300, 1035]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 1, 0, 2, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 592, 558, 1525, 923, 1357, 270, 518, 206, 971]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 3, 3, 2, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1188, 364, 580, 29, 626, 1466, 106, 1186, 680]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 3, 1, 2, 0, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[441, 59, 926, 559, 768, 505, 607, 316, 81]], dtype=int64))\n",
"Predicted: good Data: (0, 2, 2, 2, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[ 675, 965, 88, 1179, 57, 246, 1250, 381, 525]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 2, 0, 2, 0) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 476, 952, 1442, 1181, 145, 187, 636, 970, 543]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 2, 2, 2, 1, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1233, 1210, 525, 151, 538, 360, 344, 998, 82]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 0, 0, 0, 0, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 243, 1397, 954, 815, 847, 706, 848, 892, 439]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 3, 3, 2, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1022, 1231, 239, 1430, 51, 522, 26, 339, 1053]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 1, 3, 0, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1448, 1061, 898, 452, 570, 367, 1069, 606, 1394]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 3, 3, 0, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1192, 587, 1522, 1081, 493, 1004, 1186, 917, 334]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 3, 3, 0, 1, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 480, 591, 1206, 884, 666, 984, 669, 767, 1120]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 3, 1, 0, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1078, 903, 564, 951, 1000, 1150, 430, 730, 271]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 3, 1, 0, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1412, 1122, 226, 1088, 1500, 751, 1212, 1457, 1350]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 2, 3, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 897, 743, 984, 236, 467, 480, 1389, 286, 452]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (1, 0, 1, 1, 0, 2) Actual: unacc\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1477, 1205, 1201, 1106, 66, 900, 1003, 1400, 599]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 2, 3, 0, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[699, 858, 301, 537, 889, 917, 170, 334, 499]], dtype=int64))\n",
"Predicted: good Data: (2, 3, 3, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 51, 666, 984, 504, 459, 105, 587, 509, 499]], dtype=int64))\n",
"Predicted: good Data: (2, 2, 1, 2, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 659, 902, 1140, 1304, 398, 678, 712, 373, 319]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 0, 3, 2, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 711, 958, 1099, 1234, 1230, 1418, 1000, 1177, 1115]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 2, 1, 2, 1, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1025, 310, 1427, 1519, 1453, 876, 948, 88, 1360]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 3, 3, 0, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1437, 1425, 493, 587, 1370, 619, 276, 242, 399]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 0, 1, 1, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 785, 870, 930, 1155, 137, 1102, 1491, 223, 573]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 0, 0, 0, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 449, 565, 243, 97, 1196, 633, 848, 38, 420]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 3, 2, 1, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1464, 888, 563, 34, 881, 612, 1082, 232, 761]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (3, 1, 3, 1, 0, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1376, 611, 1252, 1078, 1184, 1012, 1334, 1080, 1245]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 0, 1, 0, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 863, 1405, 815, 780, 845, 1397, 488, 1321, 14]],\n",
" dtype=int64))\n",
"Predicted: acc Data: (1, 2, 3, 2, 0, 2) Actual: acc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 103, 448, 660, 1262, 1490, 972, 534, 184, 1229]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 3, 2, 0, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 867, 50, 1223, 15, 254, 361, 560, 728, 1035]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 2, 0, 0, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1469, 1169, 1209, 719, 1432, 322, 449, 554, 1196]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 3, 2, 1, 0, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1458, 159, 674, 1316, 1242, 260, 292, 2, 788]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (2, 2, 3, 1, 0, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1262, 318, 596, 534, 1236, 279, 583, 741, 1537]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 0, 1, 0, 1, 0) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1072, 682, 1097, 847, 342, 819, 220, 1446, 954]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 1, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 635, 747, 1163, 1538, 536, 1050, 1377, 216, 1359]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 0, 1, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 602, 1211, 97, 647, 1476, 742, 1167, 633, 265]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 3, 3, 2, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1414, 1528, 1365, 93, 1451, 327, 1068, 1191, 364]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 1, 3, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 499, 1111, 367, 1478, 1482, 898, 444, 537, 1394]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 2, 2, 2, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 816, 151, 246, 42, 1250, 198, 908, 1265, 82]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 1, 2, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 744, 671, 547, 1265, 885, 1382, 41, 315, 274]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 3, 0, 0, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[ 11, 321, 96, 730, 594, 1298, 1000, 1512, 165]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 2, 0, 2, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1516, 653, 506, 1185, 582, 5, 1006, 1517, 148]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 1, 2, 1, 2, 0) Actual: unacc\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1214, 113, 1434, 654, 145, 1426, 28, 970, 1113]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 3, 1, 0, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 481, 89, 235, 1046, 976, 702, 347, 492, 966]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 1, 1, 2, 0, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1248, 351, 697, 1391, 224, 1226, 633, 602, 1006]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (1, 0, 1, 1, 2, 2) Actual: unacc\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 962, 245, 1377, 204, 1400, 434, 990, 1020, 747]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 1, 2, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 602, 1211, 1543, 468, 893, 850, 1172, 265, 1025]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 3, 3, 2, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 13, 1068, 717, 79, 302, 93, 242, 1138, 934]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 2, 1, 2, 2, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 209, 246, 690, 712, 1153, 198, 613, 229, 373]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 1, 1, 2, 2, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1516, 1044, 547, 1434, 315, 113, 1517, 672, 1006]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 0, 2, 2, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[213, 194, 445, 407, 259, 228, 531, 82, 978]], dtype=int64))\n",
"Predicted: good Data: (1, 2, 3, 1, 2, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1435, 735, 1470, 512, 236, 1515, 1455, 743, 183]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 2, 0, 1, 0) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 62, 420, 187, 340, 145, 31, 1126, 1113, 1220]],\n",
" dtype=int64))\n",
"Predicted: vgood Data: (1, 1, 0, 2, 0, 0) Actual: vgood\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 676, 1321, 351, 1014, 1378, 1375, 1039, 1248, 274]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 1, 2, 0, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 337, 889, 1394, 537, 748, 941, 230, 461, 176]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 0, 2, 0, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 128, 298, 1031, 176, 230, 632, 149, 773, 1135]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (3, 1, 0, 2, 0, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1357, 270, 317, 598, 21, 868, 1431, 186, 807]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 3, 0, 0, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 716, 1036, 40, 1551, 328, 109, 617, 33, 982]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 0, 2, 1, 2, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 839, 962, 1157, 695, 245, 39, 912, 127, 825]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 1, 2, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1039, 1121, 1549, 1415, 181, 1248, 887, 1360, 647]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 3, 0, 0, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1060, 36, 490, 1169, 244, 1107, 179, 1017, 449]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 3, 1, 2, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1413, 253, 1516, 74, 1187, 1185, 1270, 1350, 624]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (3, 1, 2, 2, 2, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 838, 150, 700, 47, 201, 1493, 101, 1135, 1308]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (2, 3, 2, 1, 0, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1180, 596, 741, 413, 232, 881, 63, 551, 292]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 2, 1, 2, 2, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 712, 814, 1304, 197, 466, 290, 1317, 487, 1307]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 0, 3, 1, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 125, 370, 445, 531, 1115, 343, 94, 222, 727]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 1, 1, 2, 1, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1032, 672, 850, 1254, 1006, 274, 547, 265, 351]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 3, 0, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1015, 100, 199, 1161, 1285, 1002, 773, 824, 1171]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 3, 0, 0, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1436, 843, 179, 102, 983, 852, 490, 1036, 322]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 3, 0, 2, 1, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 627, 1531, 140, 1427, 429, 233, 876, 355, 1439]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 1, 2, 1, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 575, 754, 461, 1030, 1275, 1076, 748, 1154, 622]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 0, 3, 2, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1.41421356,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[ 796, 445, 1115, 1251, 1099, 958, 126, 1230, 240]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 2, 3, 1, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1191, 1451, 1273, 498, 1553, 1194, 1481, 698, 170]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 2, 0, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 920, 1051, 75, 637, 261, 119, 919, 1178, 687]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (2, 3, 1, 1, 1, 0) Actual: unacc\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1457, 1539, 879, 394, 1489, 1190, 392, 751, 797]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 0, 2, 1, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[ 713, 1189, 1385, 444, 412, 773, 20, 72, 634]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 2, 2, 0, 0, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1532, 159, 330, 377, 1080, 260, 1174, 300, 207]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 1, 1, 1, 2, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1452, 885, 1095, 219, 933, 414, 107, 1382, 691]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 3, 3, 0, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1411, 405, 1425, 428, 437, 493, 526, 168, 556]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 2, 0, 0, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 544, 449, 1456, 1017, 133, 97, 188, 763, 1503]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 0, 2, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[ 148, 1517, 1278, 1006, 625, 113, 349, 457, 547]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 0, 0, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[1016, 43, 994, 774, 248, 1071, 863, 639, 954]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 3, 2, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 826, 1251, 1238, 663, 82, 1256, 445, 1139, 1552]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 2, 3, 0, 0, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 911, 1224, 399, 1533, 834, 556, 1151, 1437, 96]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 0, 3, 1, 0, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[1176, 1177, 731, 1475, 1115, 370, 240, 937, 1205]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 2, 0, 2, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 568, 1043, 1211, 468, 116, 710, 776, 1419, 1062]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (2, 0, 1, 2, 1, 0) Actual: unacc\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1416, 1303, 0, 1132, 294, 746, 436, 455, 1033]],\n",
" dtype=int64))\n",
"Predicted: vgood Data: (1, 1, 3, 1, 0, 0) Actual: vgood\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1417, 1158, 135, 1078, 911, 703, 1000, 160, 958]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 2, 0, 0, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1444, 164, 102, 567, 249, 1505, 1131, 1209, 723]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 2, 1, 1, 2, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1108, 608, 659, 712, 686, 613, 487, 721, 1440]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 2, 2, 1, 2, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 965, 352, 654, 1515, 306, 283, 512, 1426, 908]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 1, 1, 0, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 320, 1207, 139, 1155, 1241, 223, 786, 553, 432]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 0, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 55, 762, 311, 1168, 1026, 1300, 729, 1101, 1396]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 2, 3, 0, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 587, 276, 117, 1061, 570, 1481, 509, 1004, 698]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (0, 0, 3, 2, 1, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 94, 445, 796, 228, 1256, 1164, 1124, 958, 1486]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 1, 2, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 434, 1381, 714, 1540, 204, 1134, 177, 115, 916]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 1, 0, 0, 1, 2) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 146, 1362, 1485, 871, 1266, 541, 1168, 46, 311]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 3, 0, 1, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 359, 827, 1291, 738, 919, 920, 546, 495, 1042]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 2, 0, 0, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 111, 62, 1533, 45, 1306, 187, 1255, 160, 549]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 3, 3, 0, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 29, 864, 917, 1450, 334, 1495, 254, 1336, 17]],\n",
" dtype=int64))\n",
"Predicted: vgood Data: (1, 2, 0, 1, 0, 0) Actual: vgood\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1321, 221, 809, 1209, 400, 529, 1378, 322, 387]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 2, 3, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 509, 1009, 570, 1199, 499, 367, 459, 858, 666]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 0, 4, 1, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[ 384, 557, 313, 1264, 1356, 890, 1454, 832, 1334]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 1, 0, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1130, 25, 521, 133, 565, 97, 1019, 602, 988]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 2, 3, 1, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 301, 961, 1184, 611, 609, 583, 170, 652, 330]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 0, 1, 0, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1322, 479, 223, 773, 1074, 1241, 1024, 870, 813]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 0, 0, 1, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1049, 1010, 997, 1020, 524, 986, 990, 284, 799]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 3, 0, 1, 1, 2) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 907, 1305, 288, 1472, 429, 196, 233, 402, 579]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 1, 2, 1, 1) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1360, 382, 373, 1029, 1502, 850, 274, 1127, 267]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 0, 3, 1, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1158, 1298, 451, 430, 1115, 343, 1177, 1000, 510]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 3, 2, 1, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[1217, 390, 1179, 486, 891, 908, 272, 965, 1429]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (3, 1, 2, 2, 0, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 152, 1279, 317, 1358, 1388, 770, 831, 784, 533]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 1, 2, 0, 2, 2) Actual: good\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[1171, 1163, 367, 216, 72, 1482, 1199, 1069, 158]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 3, 0, 0, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 702, 976, 1100, 1145, 200, 1240, 1461, 1281, 1244]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (3, 1, 3, 2, 0, 2) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1.41421356,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[1494, 262, 8, 255, 152, 831, 770, 1227, 1279]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 2, 0, 0, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 403, 736, 529, 1036, 1432, 817, 322, 647, 1173]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 3, 2, 2, 2, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[1187, 806, 955, 1179, 253, 908, 1200, 485, 7]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 2, 1, 0, 1, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 446, 297, 629, 889, 210, 555, 1013, 1178, 261]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (1, 3, 2, 1, 0, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 336, 1260, 794, 949, 2, 477, 1374, 707, 612]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 1, 0, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 479, 1074, 1097, 1028, 664, 1446, 54, 774, 1016]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 1, 2, 0, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 111, 31, 62, 1479, 6, 1346, 1393, 549, 539]],\n",
" dtype=int64))\n",
"Predicted: good Data: (1, 1, 0, 0, 1, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 875, 1072, 954, 249, 1196, 1396, 848, 554, 45]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 0, 2, 0, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1.41421356, 1.41421356, 1.41421356, 1.41421356]]), array([[1130, 549, 426, 510, 878, 1255, 1126, 1239, 1475]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (2, 1, 0, 2, 1, 0) Actual: acc\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 676, 640, 1033, 835, 206, 1525, 1014, 1416, 406]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 0, 2, 1, 0, 1) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 451, 1475, 1346, 1115, 16, 531, 673, 549, 126]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (3, 1, 0, 1, 0, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.41421356]]), array([[ 157, 425, 186, 180, 85, 780, 1405, 957, 1182]],\n",
" dtype=int64))\n",
"Predicted: good Data: (0, 0, 1, 2, 2, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 990, 986, 434, 811, 1133, 366, 801, 1504, 407]],\n",
" dtype=int64))\n",
"Predicted: good Data: (3, 3, 3, 0, 0, 2) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 17, 251, 56, 1537, 540, 1395, 858, 170, 851]],\n",
" dtype=int64))\n",
"Predicted: unacc Data: (3, 1, 2, 1, 1, 2) Actual: unacc\n",
"N: (array([[1., 1., 1., 1., 1., 1., 1., 1., 1.]]), array([[ 444, 1333, 831, 141, 748, 941, 1520, 770, 575]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 2, 0, 0, 2, 0) Actual: good\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.41421356, 1.41421356]]), array([[ 719, 1500, 1212, 1355, 994, 264, 581, 983, 1128]],\n",
" dtype=int64))\n",
"Predicted: good Data: (2, 0, 0, 2, 0, 0) Actual: unacc\n",
"N: (array([[1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1.41421356, 1.41421356, 1.41421356]]), array([[ 455, 70, 1225, 1375, 1014, 1416, 494, 1249, 780]],\n",
" dtype=int64))\n"
]
}
],
"source": [
"predicted = model.predict(x_test)\n",
"names = [\"unacc\", \"acc\", \"good\", \"vgood\"]\n",
"\n",
"for x in range(len(predicted)):\n",
" print(\"Predicted: \", names[predicted[x]], \"Data: \", x_test[x], \"Actual: \", names[y_test[x]])\n",
" n = model.kneighbors([x_test[x]], 9, True)\n",
" print(\"N: \", n)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Support Vector Machines (SVM)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['mean radius' 'mean texture' 'mean perimeter' 'mean area'\n",
" 'mean smoothness' 'mean compactness' 'mean concavity'\n",
" 'mean concave points' 'mean symmetry' 'mean fractal dimension'\n",
" 'radius error' 'texture error' 'perimeter error' 'area error'\n",
" 'smoothness error' 'compactness error' 'concavity error'\n",
" 'concave points error' 'symmetry error' 'fractal dimension error'\n",
" 'worst radius' 'worst texture' 'worst perimeter' 'worst area'\n",
" 'worst smoothness' 'worst compactness' 'worst concavity'\n",
" 'worst concave points' 'worst symmetry' 'worst fractal dimension']\n",
"['malignant' 'benign']\n"
]
}
],
"source": [
"import sklearn\n",
"from sklearn import datasets\n",
"from sklearn import svm\n",
"from sklearn import metrics\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"\n",
"cancer = datasets.load_breast_cancer()\n",
"\n",
"print(cancer.feature_names)\n",
"print(cancer.target_names)\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[1.095e+01 2.135e+01 7.190e+01 ... 1.424e-01 2.964e-01 9.606e-02]\n",
" [1.300e+01 2.182e+01 8.750e+01 ... 2.060e-01 4.378e-01 1.072e-01]\n",
" [1.184e+01 1.894e+01 7.551e+01 ... 6.913e-02 2.535e-01 7.993e-02]\n",
" ...\n",
" [1.187e+01 2.154e+01 7.683e+01 ... 8.750e-02 2.305e-01 9.952e-02]\n",
" [1.094e+01 1.859e+01 7.039e+01 ... 7.887e-02 2.251e-01 7.732e-02]\n",
" [1.049e+01 1.861e+01 6.686e+01 ... 6.528e-02 2.213e-01 7.842e-02]] [0 0 1 1 1 1 0 0 0 1 1 0 1 1 1 1 0 1 0 1 0 1 0 0 0 0 1 1 0 1 0 1 1 1 0 1 1\n",
" 1 0 1 1 0 1 0 0 1 1 1 0 1 1 0 0 1 1 0 1 1 0 0 1 1 0 1 1 1 0 1 0 1 0 1 1 1\n",
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 0 1 1 0 1 0 1 1 0 1 0 1 0 0 1 1 1\n",
" 1 0 0 0 1 1 0 0 1 1 1 1 1 0 0 1 1 1 0 1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 1\n",
" 0 0 0 1 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 0 1 1 0 1 1 0 1 0 0 0 0 1 0 1\n",
" 0 1 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1 0\n",
" 1 0 1 0 1 1 1 1 1 1 1 0 1 1 0 0 0 0 1 1 1 1 0 0 1 0 1 1 1 0 1 0 1 1 1 1 0\n",
" 1 1 0 1 1 0 1 1 1 0 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1\n",
" 1 1 0 0 1 1 1 1 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0\n",
" 0 0 0 0 1 0 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 0 0 1 1 1\n",
" 0 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 1 0 0 0 0 1 1 1 1 1 1 1 0 1 0 0\n",
" 1 1 1 1 1 1 1 1 0 0 1 0 1 0 1 1 0 1 1 1 1 0 1 1 1 1 0 0 1 0 0 0 1 1 1 0 1\n",
" 0 1 1 1 0 0 1 0 1 1 1]\n"
]
}
],
"source": [
"x = cancer.data\n",
"y = cancer.target\n",
"\n",
"x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size=0.2)\n",
"\n",
"print(x_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.9385964912280702\n"
]
}
],
"source": [
"clf = svm.SVC(kernel=\"linear\")\n",
"clf.fit(x_train, y_train)\n",
"\n",
"y_pred = clf.predict(x_test)\n",
"\n",
"acc = metrics.accuracy_score(y_test, y_pred)\n",
"\n",
"print(\"Accuracy: \",acc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}