mirror of
https://github.com/Andreaierardi/Master-DataScience-Notes.git
synced 2025-01-23 01:37:36 +01:00
finish lecture 4
This commit is contained in:
parent
c829b0fdd0
commit
416b3bad3b
@ -1,3 +1,6 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
\section{Lecture 2 - 07-04-2020}
|
\section{Lecture 2 - 07-04-2020}
|
||||||
|
|
||||||
\subsection{Argomento}
|
\subsection{Argomento}
|
||||||
@ -193,3 +196,5 @@ We want to replace this process with a predictor (so we don’t have to bored a
|
|||||||
person).\\
|
person).\\
|
||||||
y is the ground truth for x $\rightarrow$ mean reality!\\
|
y is the ground truth for x $\rightarrow$ mean reality!\\
|
||||||
If i want to predict stock for tomorrow, i will wait tomorrow to see the ground truth.
|
If i want to predict stock for tomorrow, i will wait tomorrow to see the ground truth.
|
||||||
|
|
||||||
|
\end{document}
|
@ -1,4 +1,4 @@
|
|||||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 15:16
|
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 15:20
|
||||||
entering extended mode
|
entering extended mode
|
||||||
**./lecture3.tex
|
**./lecture3.tex
|
||||||
(lecture3.tex
|
(lecture3.tex
|
||||||
@ -354,7 +354,7 @@ MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><E:/Program Files/MiKTeX 2
|
|||||||
type1/public/amsfonts/cm/cmsy8.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/pub
|
type1/public/amsfonts/cm/cmsy8.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/pub
|
||||||
lic/amsfonts/cm/cmti12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsf
|
lic/amsfonts/cm/cmti12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsf
|
||||||
onts/symbols/msbm10.pfb>
|
onts/symbols/msbm10.pfb>
|
||||||
Output written on lecture3.pdf (6 pages, 135512 bytes).
|
Output written on lecture3.pdf (6 pages, 135258 bytes).
|
||||||
PDF statistics:
|
PDF statistics:
|
||||||
70 PDF objects out of 1000 (max. 8388607)
|
70 PDF objects out of 1000 (max. 8388607)
|
||||||
0 named destinations out of 1000 (max. 500000)
|
0 named destinations out of 1000 (max. 500000)
|
||||||
|
Binary file not shown.
Binary file not shown.
@ -210,9 +210,9 @@ $\hat{y} = + \quad or \quad \hat{y} = - $
|
|||||||
\\\
|
\\\
|
||||||
I can came up with some sort of classifier.
|
I can came up with some sort of classifier.
|
||||||
\\\\
|
\\\\
|
||||||
Given $S$ training set, i can define $h_NN X \rightarrow \{-1,1\}\\
|
Given $S$ training set, i can define $\hnn$ $X \rightarrow \{-1,1\}\\
|
||||||
$
|
$
|
||||||
$h_NN(x) = $ label $y_t$ of the point $x_t$ in $S$ closest to $X$\\
|
$\hnn(x) = $ label $y_t$ of the point $x_t$ in $S$ closest to $X$\\
|
||||||
\textbf{(the breaking rule for ties)}
|
\textbf{(the breaking rule for ties)}
|
||||||
\\
|
\\
|
||||||
For the closest we mean euclidian distance
|
For the closest we mean euclidian distance
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
\relax
|
||||||
|
\@writefile{toc}{\contentsline {section}{\numberline {1}Lecture 4 - 07-04-2020}{1}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Computing $h_{NN}$}{1}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Tree Predictor}{2}\protected@file@percent }
|
@ -0,0 +1,292 @@
|
|||||||
|
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 16:28
|
||||||
|
entering extended mode
|
||||||
|
**./lecture4.tex
|
||||||
|
(lecture4.tex
|
||||||
|
LaTeX2e <2020-02-02> patch level 2
|
||||||
|
L3 programming layer <2020-02-14>
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/subfiles\subfiles.cls"
|
||||||
|
Document Class: subfiles 2020/02/14 v1.6 Multi-file projects (class)
|
||||||
|
Preamble taken from file `../main.tex'
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/tools\verbatim.sty"
|
||||||
|
Package: verbatim 2019/11/10 v1.5r LaTeX2e package for verbatim enhancements
|
||||||
|
\every@verbatim=\toks14
|
||||||
|
\verbatim@line=\toks15
|
||||||
|
\verbatim@in@stream=\read2
|
||||||
|
)
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/import\import.sty"
|
||||||
|
Package: import 2020/04/01 v 6.2
|
||||||
|
) (../main.tex
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/base\article.cls"
|
||||||
|
Document Class: article 2019/12/20 v1.4l Standard LaTeX document class
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/base\size12.clo"
|
||||||
|
File: size12.clo 2019/12/20 v1.4l Standard LaTeX file (size option)
|
||||||
|
)
|
||||||
|
\c@part=\count167
|
||||||
|
\c@section=\count168
|
||||||
|
\c@subsection=\count169
|
||||||
|
\c@subsubsection=\count170
|
||||||
|
\c@paragraph=\count171
|
||||||
|
\c@subparagraph=\count172
|
||||||
|
\c@figure=\count173
|
||||||
|
\c@table=\count174
|
||||||
|
\abovecaptionskip=\skip47
|
||||||
|
\belowcaptionskip=\skip48
|
||||||
|
\bibindent=\dimen134
|
||||||
|
)
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amsmath.sty"
|
||||||
|
Package: amsmath 2020/01/20 v2.17e AMS math features
|
||||||
|
\@mathmargin=\skip49
|
||||||
|
|
||||||
|
For additional information on amsmath, use the `?' option.
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amstext.sty"
|
||||||
|
Package: amstext 2000/06/29 v2.01 AMS text
|
||||||
|
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amsgen.sty"
|
||||||
|
File: amsgen.sty 1999/11/30 v2.0 generic functions
|
||||||
|
\@emptytoks=\toks16
|
||||||
|
\ex@=\dimen135
|
||||||
|
))
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amsbsy.sty"
|
||||||
|
Package: amsbsy 1999/11/29 v1.2d Bold Symbols
|
||||||
|
\pmbraise@=\dimen136
|
||||||
|
)
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amsopn.sty"
|
||||||
|
Package: amsopn 2016/03/08 v2.02 operator names
|
||||||
|
)
|
||||||
|
\inf@bad=\count175
|
||||||
|
LaTeX Info: Redefining \frac on input line 227.
|
||||||
|
\uproot@=\count176
|
||||||
|
\leftroot@=\count177
|
||||||
|
LaTeX Info: Redefining \overline on input line 389.
|
||||||
|
\classnum@=\count178
|
||||||
|
\DOTSCASE@=\count179
|
||||||
|
LaTeX Info: Redefining \ldots on input line 486.
|
||||||
|
LaTeX Info: Redefining \dots on input line 489.
|
||||||
|
LaTeX Info: Redefining \cdots on input line 610.
|
||||||
|
\Mathstrutbox@=\box45
|
||||||
|
\strutbox@=\box46
|
||||||
|
\big@size=\dimen137
|
||||||
|
LaTeX Font Info: Redeclaring font encoding OML on input line 733.
|
||||||
|
LaTeX Font Info: Redeclaring font encoding OMS on input line 734.
|
||||||
|
\macc@depth=\count180
|
||||||
|
\c@MaxMatrixCols=\count181
|
||||||
|
\dotsspace@=\muskip16
|
||||||
|
\c@parentequation=\count182
|
||||||
|
\dspbrk@lvl=\count183
|
||||||
|
\tag@help=\toks17
|
||||||
|
\row@=\count184
|
||||||
|
\column@=\count185
|
||||||
|
\maxfields@=\count186
|
||||||
|
\andhelp@=\toks18
|
||||||
|
\eqnshift@=\dimen138
|
||||||
|
\alignsep@=\dimen139
|
||||||
|
\tagshift@=\dimen140
|
||||||
|
\tagwidth@=\dimen141
|
||||||
|
\totwidth@=\dimen142
|
||||||
|
\lineht@=\dimen143
|
||||||
|
\@envbody=\toks19
|
||||||
|
\multlinegap=\skip50
|
||||||
|
\multlinetaggap=\skip51
|
||||||
|
\mathdisplay@stack=\toks20
|
||||||
|
LaTeX Info: Redefining \[ on input line 2859.
|
||||||
|
LaTeX Info: Redefining \] on input line 2860.
|
||||||
|
)
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/systeme\systeme.sty"
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/xstring\xstring.sty"
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/generic/xstring\xstring.tex"
|
||||||
|
\integerpart=\count187
|
||||||
|
\decimalpart=\count188
|
||||||
|
)
|
||||||
|
Package: xstring 2019/02/06 v1.83 String manipulations (CT)
|
||||||
|
)
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/generic/systeme\systeme.tex"
|
||||||
|
\SYS_systemecode=\toks21
|
||||||
|
\SYS_systempreamble=\toks22
|
||||||
|
\SYSeqnum=\count189
|
||||||
|
)
|
||||||
|
Package: systeme 2019/01/13 v0.32 Mise en forme de systemes d'equations (CT)
|
||||||
|
)
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\amssymb.sty"
|
||||||
|
Package: amssymb 2013/01/14 v3.01 AMS font symbols
|
||||||
|
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\amsfonts.sty"
|
||||||
|
Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
|
||||||
|
\symAMSa=\mathgroup4
|
||||||
|
\symAMSb=\mathgroup5
|
||||||
|
LaTeX Font Info: Redeclaring math symbol \hbar on input line 98.
|
||||||
|
LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
|
||||||
|
(Font) U/euf/m/n --> U/euf/b/n on input line 106.
|
||||||
|
))
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/subfiles\subfiles.sty"
|
||||||
|
Package: subfiles 2020/02/14 v1.6 Multi-file projects (package)
|
||||||
|
)))
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/l3backend\l3backend-pdfmode.def"
|
||||||
|
File: l3backend-pdfmode.def 2020-02-03 L3 backend support: PDF mode
|
||||||
|
\l__kernel_color_stack_int=\count190
|
||||||
|
\l__pdf_internal_box=\box47
|
||||||
|
)
|
||||||
|
(lecture4.aux)
|
||||||
|
\openout1 = `lecture4.aux'.
|
||||||
|
|
||||||
|
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 3.
|
||||||
|
LaTeX Font Info: ... okay on input line 3.
|
||||||
|
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 3.
|
||||||
|
LaTeX Font Info: ... okay on input line 3.
|
||||||
|
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 3.
|
||||||
|
LaTeX Font Info: ... okay on input line 3.
|
||||||
|
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 3.
|
||||||
|
LaTeX Font Info: ... okay on input line 3.
|
||||||
|
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 3.
|
||||||
|
LaTeX Font Info: ... okay on input line 3.
|
||||||
|
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 3.
|
||||||
|
LaTeX Font Info: ... okay on input line 3.
|
||||||
|
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 3.
|
||||||
|
LaTeX Font Info: ... okay on input line 3.
|
||||||
|
LaTeX Font Info: Trying to load font information for U+msa on input line 8.
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsa.fd"
|
||||||
|
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
|
||||||
|
)
|
||||||
|
LaTeX Font Info: Trying to load font information for U+msb on input line 8.
|
||||||
|
|
||||||
|
("E:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsb.fd"
|
||||||
|
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
|
||||||
|
)
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 10--14
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 15--17
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 19--20
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 21--39
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 39--40
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
[1
|
||||||
|
|
||||||
|
{C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 42--66
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 42--66
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 42--66
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 42--66
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 68--70
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 73--88
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
[2]
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 93--99
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Overfull \hbox (30.99239pt too wide) detected at line 115
|
||||||
|
\OML/cmm/m/it/12 X \OT1/cmr/m/n/12 = \OMS/cmsy/m/n/12 f\OML/cmm/m/it/12 Sunny;
|
||||||
|
\OT1/cmr/m/n/12 50%\OML/cmm/m/it/12 ; No\OMS/cmsy/m/n/12 g ! []f\OML/cmm/m/it
|
||||||
|
/12 outlook:humidity; windy\OMS/cmsy/m/n/12 g
|
||||||
|
[]
|
||||||
|
|
||||||
|
[3]
|
||||||
|
|
||||||
|
LaTeX Warning: Command \textquoteright invalid in math mode on input line 163.
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
[4] (lecture4.aux) )
|
||||||
|
Here is how much of TeX's memory you used:
|
||||||
|
2047 strings out of 481556
|
||||||
|
28032 string characters out of 2923622
|
||||||
|
262310 words of memory out of 3000000
|
||||||
|
17294 multiletter control sequences out of 15000+200000
|
||||||
|
542187 words of font info for 61 fonts, out of 3000000 for 9000
|
||||||
|
1141 hyphenation exceptions out of 8191
|
||||||
|
42i,8n,44p,316b,125s stack positions out of 5000i,500n,10000p,200000b,50000s
|
||||||
|
<C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfo
|
||||||
|
ur/jknappen/ec/dpi600\tcrm1200.pk><E:/Program Files/MiKTeX 2.9/fonts/type1/publ
|
||||||
|
ic/amsfonts/cm/cmbx12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfo
|
||||||
|
nts/cm/cmex10.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/c
|
||||||
|
mmi10.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmmi12.pf
|
||||||
|
b><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmmi8.pfb><E:/Pro
|
||||||
|
gram Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr12.pfb><E:/Program File
|
||||||
|
s/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8.pfb><E:/Program Files/MiKTeX 2
|
||||||
|
.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><E:/Program Files/MiKTeX 2.9/fonts
|
||||||
|
/type1/public/amsfonts/cm/cmsy8.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/pu
|
||||||
|
blic/amsfonts/cm/cmti12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/ams
|
||||||
|
fonts/symbols/msbm10.pfb>
|
||||||
|
Output written on lecture4.pdf (4 pages, 125496 bytes).
|
||||||
|
PDF statistics:
|
||||||
|
64 PDF objects out of 1000 (max. 8388607)
|
||||||
|
0 named destinations out of 1000 (max. 500000)
|
||||||
|
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||||
|
|
Binary file not shown.
Binary file not shown.
@ -1 +1,164 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
\section{Lecture 4 - 07-04-2020}
|
\section{Lecture 4 - 07-04-2020}
|
||||||
|
|
||||||
|
We spoke about Knn classifier with voronoi diagram
|
||||||
|
|
||||||
|
$$
|
||||||
|
\hat{\ell}(\hnn) = 0 \qquad \forall Traning set
|
||||||
|
$$
|
||||||
|
\\
|
||||||
|
$\hnn$ predictor needs to store entire dataset.
|
||||||
|
\\
|
||||||
|
\subsection{Computing $\hnn$}
|
||||||
|
Computing $\hnn(x)$ requires computing distances between x and points in the traning set.
|
||||||
|
\\
|
||||||
|
$$
|
||||||
|
\Theta(d) \quad \textit{time for each distance}
|
||||||
|
$$\\
|
||||||
|
|
||||||
|
NN $\rightarrow$ 1-NN\\
|
||||||
|
We can generalise NN in K-NN with $k = 1,3,5,7$ so odd $K$ \\
|
||||||
|
$\hknn(x)$ = label corresponding to the majority of labels of the k closet point to
|
||||||
|
x in the training set.\\\\
|
||||||
|
How big could $K$ be if i have $n$ point?\\
|
||||||
|
I look at the $k$ closest point\\
|
||||||
|
When $k = m$?\\
|
||||||
|
The majority, will be a constant classifier
|
||||||
|
$\hknn$ is constant and corresponds to the majority of training labels\\
|
||||||
|
Training error is always 0 for $\hnn$, while for $\hknn$ will be typically $>0$, with $k >
|
||||||
|
1$\\
|
||||||
|
Image: one dimensional classifier and training set is repeated.
|
||||||
|
Is the plot of 1-NN classifier.\\
|
||||||
|
Positive and negative.
|
||||||
|
$K = 1$ error is 0.\\
|
||||||
|
In the second line we switch to $k =3$. Second point doesn’t switch and third will
|
||||||
|
be classify to positive and we have training mistake.\\
|
||||||
|
Switches corresponds to border of voronoi partition.
|
||||||
|
$$\knn \qquad \textit{For multiclass classification}$$\\
|
||||||
|
$$
|
||||||
|
(|Y| > 2 ) \qquad \textit{for regression } Y\equiv \barra{R}
|
||||||
|
$$
|
||||||
|
\\
|
||||||
|
Average of labels of $K$ neighbours $\rightarrow$ i will get a number with prediction.
|
||||||
|
\\
|
||||||
|
I can weight average by distance
|
||||||
|
\\
|
||||||
|
You can vary this algorithm as you want.\\\\
|
||||||
|
Let’s go back to Binary classification.\\
|
||||||
|
The $k$ parameter is the effect of making the structure of classifier more
|
||||||
|
complex and less complex for small value of $k$.\\\\
|
||||||
|
--.. DISEGNO ..--
|
||||||
|
\\
|
||||||
|
Fix training set and test set\\
|
||||||
|
Accury as oppose to the error
|
||||||
|
\\\\
|
||||||
|
Show a plot. Training error is 0 at $k = 0$.\\
|
||||||
|
As i go further training error is higher and test error goes down. At some point
|
||||||
|
after which training and set met and then after that training and test error goes
|
||||||
|
up (accuracy goes down).\\
|
||||||
|
If i run algorithm is going to be overfitting: training error and test error is high and also underfitting since testing and training are close and both high.
|
||||||
|
Trade off point is the point in $x = 23$ (more or less).\\
|
||||||
|
There are some heuristic to run NN algorithm without value of $k$.
|
||||||
|
\\\\
|
||||||
|
\textbf{History}
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\knn$: from 1960 $\rightarrow$ $X \equiv \barra{R}^d$
|
||||||
|
\item Tree predictor: from 1980
|
||||||
|
\\
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Tree Predictor}
|
||||||
|
If a give you data not welled defined in a Euclidean space.
|
||||||
|
\\
|
||||||
|
$X = X_1 \cdot x \cdot ... \cdot X_d \cdot x$ \qquad Medical Record
|
||||||
|
\\
|
||||||
|
$X_1 = \{Male, Female\}$\\
|
||||||
|
$X_2 = \{Yes, No\}$
|
||||||
|
\\
|
||||||
|
so we have different data
|
||||||
|
\\\\
|
||||||
|
I want to avoid comparing $x_i$ with $x_j$, $i\neq j $\\
|
||||||
|
so comparing different feature and we want to compare each feature with
|
||||||
|
each self. I don’t want to mix them up.\\
|
||||||
|
We can use a tree!
|
||||||
|
\\
|
||||||
|
I have 3 features:
|
||||||
|
\begin{itemize}
|
||||||
|
\item outlook $= \{sunny, overcast, rain\}$
|
||||||
|
\item humidity $= \{[0,100]\}$
|
||||||
|
\item windy $ = \{yes,no\}$
|
||||||
|
\end{itemize}
|
||||||
|
... -- DISEGNO -- ...\\\\
|
||||||
|
Tree is a natural way of doing decision and abstraction of decision process of
|
||||||
|
one person. It is a good way to deal with categorical variables.\\
|
||||||
|
What kind of tree we are talking about?\\
|
||||||
|
Tree has inner node and leaves. Leaves are associated with labels $(Y)$ and
|
||||||
|
inner nodes are associated with test.
|
||||||
|
\begin{itemize}
|
||||||
|
\item Inner node $\rightarrow$ test
|
||||||
|
\item Leaves $\rightarrow$ label in Y
|
||||||
|
\end{itemize}
|
||||||
|
%... -- DISEGNO -- ...
|
||||||
|
Test if a function $f$ (NOT A PREDICTOR!) \\
|
||||||
|
Test $ \qquad f_i \, X_i \rightarrow \{1,...,k\}$
|
||||||
|
\\ where $k$ is the number of children (inner node) to which test is assigned
|
||||||
|
\\
|
||||||
|
In a tree predictor we have:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Root node
|
||||||
|
\item Children are ordered(i know the order of each branch that come out from the node)
|
||||||
|
\end{itemize}
|
||||||
|
$$
|
||||||
|
X = \{Sunny, 50\%, No \} \quad \rightarrow \quad \textit{are the parameters for } \{outlook. humidity, windy \}
|
||||||
|
$$
|
||||||
|
\\
|
||||||
|
$
|
||||||
|
f_i =
|
||||||
|
\begin{cases}
|
||||||
|
1, & \mbox{if } x_2 \in [30 \%,60 \% ]
|
||||||
|
\\
|
||||||
|
2, & \mbox{if } otherwise \end{cases}
|
||||||
|
$
|
||||||
|
\\ where the numbers 1 and 2 are the children
|
||||||
|
\\
|
||||||
|
A test is partitioning the range of values of a certain attribute in a number of
|
||||||
|
elements equal to number of children of of the node to which the test is
|
||||||
|
assigned.
|
||||||
|
\\
|
||||||
|
$h_T(x)$ is always the label of a leaf of T\\
|
||||||
|
This leaf is the leaf to which $x$ is \textbf{routed}
|
||||||
|
\\
|
||||||
|
Data space for this problem (outlook,..) is partitioned in the leaves of the tree.
|
||||||
|
It won’t be like voronoi graph.
|
||||||
|
How do I build a tree given a training set?
|
||||||
|
How do i learn a tree predictor given a training set?
|
||||||
|
\begin{itemize}
|
||||||
|
\item Decide tree structure (how • many node, leaves ecc..)
|
||||||
|
\item Decide test on inner nodes
|
||||||
|
\item Decide labels on leaves
|
||||||
|
\end{itemize}
|
||||||
|
We have to do this all together and process will be more dynamic.
|
||||||
|
For simplicity binary classification and fix two children for each inner node.\\\\
|
||||||
|
$ Y = \{-1, +1 \}$
|
||||||
|
\\ $2$ children for each inner node
|
||||||
|
\\\\
|
||||||
|
What's the simplest way?\\
|
||||||
|
Initial tree and correspond to a costant classifier
|
||||||
|
\\\\
|
||||||
|
-- DISEGNO --
|
||||||
|
\\\\
|
||||||
|
\textbf{Majority of all example}
|
||||||
|
\\\\
|
||||||
|
-- DISEGNO --
|
||||||
|
\\\\
|
||||||
|
$(x_1, y_1) ... (x_m, y_m)$ \\
|
||||||
|
$ x_t \in X$ \qquad $ y_t \in \{-1,+1\}$\\
|
||||||
|
Training set $S = \{ (x,y) \in S$, x is routed to $\ell\}$\\
|
||||||
|
$S_{\ell}^+$
|
||||||
|
\\\\
|
||||||
|
-- DISEGNO --
|
||||||
|
\\\\
|
||||||
|
$ S_{\ell}$ and $ S’_{\ell}$ are given by the result of the test, not the labels and $\ell$ and $\ell'$.
|
||||||
|
\end{document}
|
@ -1,4 +1,4 @@
|
|||||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 15:12
|
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 15:21
|
||||||
entering extended mode
|
entering extended mode
|
||||||
**./main.tex
|
**./main.tex
|
||||||
(main.tex
|
(main.tex
|
||||||
@ -177,111 +177,111 @@ Underfull \hbox (badness 10000) in paragraph at lines 99--139
|
|||||||
[]
|
[]
|
||||||
|
|
||||||
[4]) [5] (lectures/lecture2.tex
|
[4]) [5] (lectures/lecture2.tex
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 4--13
|
Underfull \hbox (badness 10000) in paragraph at lines 7--16
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 14--27
|
Underfull \hbox (badness 10000) in paragraph at lines 17--30
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 14--27
|
Underfull \hbox (badness 10000) in paragraph at lines 17--30
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 29--32
|
Underfull \hbox (badness 10000) in paragraph at lines 32--35
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[6]
|
[6]
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 46--49
|
Underfull \hbox (badness 10000) in paragraph at lines 49--52
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 46--49
|
Underfull \hbox (badness 10000) in paragraph at lines 49--52
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 60--78
|
Underfull \hbox (badness 10000) in paragraph at lines 63--81
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[7]
|
[7]
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 78--83
|
Underfull \hbox (badness 10000) in paragraph at lines 81--86
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 86--106
|
Underfull \hbox (badness 10000) in paragraph at lines 89--109
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 86--106
|
Underfull \hbox (badness 10000) in paragraph at lines 89--109
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 86--106
|
Underfull \hbox (badness 10000) in paragraph at lines 89--109
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 107--112
|
Underfull \hbox (badness 10000) in paragraph at lines 110--115
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[8]
|
[8]
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[9]
|
[9]
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 161--168
|
Underfull \hbox (badness 10000) in paragraph at lines 164--171
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[10]
|
[10]
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 169--179
|
Underfull \hbox (badness 10000) in paragraph at lines 172--182
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
)
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 186--33
|
Underfull \hbox (badness 10000) in paragraph at lines 189--199
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[11] (lectures/lecture3.tex
|
) [11] (lectures/lecture3.tex
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 5--7
|
Underfull \hbox (badness 10000) in paragraph at lines 5--7
|
||||||
|
|
||||||
[]
|
[]
|
||||||
@ -457,7 +457,12 @@ Underfull \hbox (badness 10000) in paragraph at lines 187--223
|
|||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[16]) [17] (lectures/lecture4.tex) [18] (lectures/lecture5.tex) [19]
|
[16]
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 225--226
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
) [17] (lectures/lecture4.tex) [18] (lectures/lecture5.tex) [19]
|
||||||
(lectures/lecture6.tex) [20] (lectures/lecture7.tex) [21]
|
(lectures/lecture6.tex) [20] (lectures/lecture7.tex) [21]
|
||||||
(lectures/lecture8.tex) [22] (lectures/lecture9.tex) [23]
|
(lectures/lecture8.tex) [22] (lectures/lecture9.tex) [23]
|
||||||
(lectures/lecture10.tex
|
(lectures/lecture10.tex
|
||||||
@ -561,7 +566,7 @@ c/amsfonts/cm/cmsy6.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfont
|
|||||||
s/cm/cmsy8.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmti
|
s/cm/cmsy8.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmti
|
||||||
12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/symbols/msbm10.
|
12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/symbols/msbm10.
|
||||||
pfb>
|
pfb>
|
||||||
Output written on main.pdf (27 pages, 198551 bytes).
|
Output written on main.pdf (27 pages, 198691 bytes).
|
||||||
PDF statistics:
|
PDF statistics:
|
||||||
146 PDF objects out of 1000 (max. 8388607)
|
146 PDF objects out of 1000 (max. 8388607)
|
||||||
0 named destinations out of 1000 (max. 500000)
|
0 named destinations out of 1000 (max. 500000)
|
||||||
|
Binary file not shown.
Binary file not shown.
@ -15,7 +15,8 @@
|
|||||||
|
|
||||||
\newcommand\barra[1]{\mathbb{#1}}
|
\newcommand\barra[1]{\mathbb{#1}}
|
||||||
\newcommand\hnn{h_{NN}}
|
\newcommand\hnn{h_{NN}}
|
||||||
|
\newcommand\hknn{h_{k-NN}}
|
||||||
|
\newcommand\knn{K_{NN}}
|
||||||
\begin{document}
|
\begin{document}
|
||||||
\maketitle
|
\maketitle
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user