mirror of
https://github.com/Andreaierardi/Master-DataScience-Notes.git
synced 2025-01-07 10:05:56 +01:00
finish lecture 4
This commit is contained in:
parent
c829b0fdd0
commit
416b3bad3b
@ -1,3 +1,6 @@
|
||||
\documentclass[../main.tex]{subfiles}
|
||||
|
||||
\begin{document}
|
||||
\section{Lecture 2 - 07-04-2020}
|
||||
|
||||
\subsection{Argomento}
|
||||
@ -193,3 +196,5 @@ We want to replace this process with a predictor (so we don’t have to bored a
|
||||
person).\\
|
||||
y is the ground truth for x $\rightarrow$ mean reality!\\
|
||||
If i want to predict stock for tomorrow, i will wait tomorrow to see the ground truth.
|
||||
|
||||
\end{document}
|
@ -1,4 +1,4 @@
|
||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 15:16
|
||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 15:20
|
||||
entering extended mode
|
||||
**./lecture3.tex
|
||||
(lecture3.tex
|
||||
@ -354,7 +354,7 @@ MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><E:/Program Files/MiKTeX 2
|
||||
type1/public/amsfonts/cm/cmsy8.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/pub
|
||||
lic/amsfonts/cm/cmti12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsf
|
||||
onts/symbols/msbm10.pfb>
|
||||
Output written on lecture3.pdf (6 pages, 135512 bytes).
|
||||
Output written on lecture3.pdf (6 pages, 135258 bytes).
|
||||
PDF statistics:
|
||||
70 PDF objects out of 1000 (max. 8388607)
|
||||
0 named destinations out of 1000 (max. 500000)
|
||||
|
Binary file not shown.
Binary file not shown.
@ -210,9 +210,9 @@ $\hat{y} = + \quad or \quad \hat{y} = - $
|
||||
\\\
|
||||
I can came up with some sort of classifier.
|
||||
\\\\
|
||||
Given $S$ training set, i can define $h_NN X \rightarrow \{-1,1\}\\
|
||||
Given $S$ training set, i can define $\hnn$ $X \rightarrow \{-1,1\}\\
|
||||
$
|
||||
$h_NN(x) = $ label $y_t$ of the point $x_t$ in $S$ closest to $X$\\
|
||||
$\hnn(x) = $ label $y_t$ of the point $x_t$ in $S$ closest to $X$\\
|
||||
\textbf{(the breaking rule for ties)}
|
||||
\\
|
||||
For the closest we mean euclidian distance
|
||||
|
@ -0,0 +1,4 @@
|
||||
\relax
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1}Lecture 4 - 07-04-2020}{1}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Computing $h_{NN}$}{1}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Tree Predictor}{2}\protected@file@percent }
|
@ -0,0 +1,292 @@
|
||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 16:28
|
||||
entering extended mode
|
||||
**./lecture4.tex
|
||||
(lecture4.tex
|
||||
LaTeX2e <2020-02-02> patch level 2
|
||||
L3 programming layer <2020-02-14>
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/subfiles\subfiles.cls"
|
||||
Document Class: subfiles 2020/02/14 v1.6 Multi-file projects (class)
|
||||
Preamble taken from file `../main.tex'
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/tools\verbatim.sty"
|
||||
Package: verbatim 2019/11/10 v1.5r LaTeX2e package for verbatim enhancements
|
||||
\every@verbatim=\toks14
|
||||
\verbatim@line=\toks15
|
||||
\verbatim@in@stream=\read2
|
||||
)
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/import\import.sty"
|
||||
Package: import 2020/04/01 v 6.2
|
||||
) (../main.tex
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/base\article.cls"
|
||||
Document Class: article 2019/12/20 v1.4l Standard LaTeX document class
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/base\size12.clo"
|
||||
File: size12.clo 2019/12/20 v1.4l Standard LaTeX file (size option)
|
||||
)
|
||||
\c@part=\count167
|
||||
\c@section=\count168
|
||||
\c@subsection=\count169
|
||||
\c@subsubsection=\count170
|
||||
\c@paragraph=\count171
|
||||
\c@subparagraph=\count172
|
||||
\c@figure=\count173
|
||||
\c@table=\count174
|
||||
\abovecaptionskip=\skip47
|
||||
\belowcaptionskip=\skip48
|
||||
\bibindent=\dimen134
|
||||
)
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amsmath.sty"
|
||||
Package: amsmath 2020/01/20 v2.17e AMS math features
|
||||
\@mathmargin=\skip49
|
||||
|
||||
For additional information on amsmath, use the `?' option.
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amstext.sty"
|
||||
Package: amstext 2000/06/29 v2.01 AMS text
|
||||
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amsgen.sty"
|
||||
File: amsgen.sty 1999/11/30 v2.0 generic functions
|
||||
\@emptytoks=\toks16
|
||||
\ex@=\dimen135
|
||||
))
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amsbsy.sty"
|
||||
Package: amsbsy 1999/11/29 v1.2d Bold Symbols
|
||||
\pmbraise@=\dimen136
|
||||
)
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/amsmath\amsopn.sty"
|
||||
Package: amsopn 2016/03/08 v2.02 operator names
|
||||
)
|
||||
\inf@bad=\count175
|
||||
LaTeX Info: Redefining \frac on input line 227.
|
||||
\uproot@=\count176
|
||||
\leftroot@=\count177
|
||||
LaTeX Info: Redefining \overline on input line 389.
|
||||
\classnum@=\count178
|
||||
\DOTSCASE@=\count179
|
||||
LaTeX Info: Redefining \ldots on input line 486.
|
||||
LaTeX Info: Redefining \dots on input line 489.
|
||||
LaTeX Info: Redefining \cdots on input line 610.
|
||||
\Mathstrutbox@=\box45
|
||||
\strutbox@=\box46
|
||||
\big@size=\dimen137
|
||||
LaTeX Font Info: Redeclaring font encoding OML on input line 733.
|
||||
LaTeX Font Info: Redeclaring font encoding OMS on input line 734.
|
||||
\macc@depth=\count180
|
||||
\c@MaxMatrixCols=\count181
|
||||
\dotsspace@=\muskip16
|
||||
\c@parentequation=\count182
|
||||
\dspbrk@lvl=\count183
|
||||
\tag@help=\toks17
|
||||
\row@=\count184
|
||||
\column@=\count185
|
||||
\maxfields@=\count186
|
||||
\andhelp@=\toks18
|
||||
\eqnshift@=\dimen138
|
||||
\alignsep@=\dimen139
|
||||
\tagshift@=\dimen140
|
||||
\tagwidth@=\dimen141
|
||||
\totwidth@=\dimen142
|
||||
\lineht@=\dimen143
|
||||
\@envbody=\toks19
|
||||
\multlinegap=\skip50
|
||||
\multlinetaggap=\skip51
|
||||
\mathdisplay@stack=\toks20
|
||||
LaTeX Info: Redefining \[ on input line 2859.
|
||||
LaTeX Info: Redefining \] on input line 2860.
|
||||
)
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/systeme\systeme.sty"
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/xstring\xstring.sty"
|
||||
("E:\Program Files\MiKTeX 2.9\tex/generic/xstring\xstring.tex"
|
||||
\integerpart=\count187
|
||||
\decimalpart=\count188
|
||||
)
|
||||
Package: xstring 2019/02/06 v1.83 String manipulations (CT)
|
||||
)
|
||||
("E:\Program Files\MiKTeX 2.9\tex/generic/systeme\systeme.tex"
|
||||
\SYS_systemecode=\toks21
|
||||
\SYS_systempreamble=\toks22
|
||||
\SYSeqnum=\count189
|
||||
)
|
||||
Package: systeme 2019/01/13 v0.32 Mise en forme de systemes d'equations (CT)
|
||||
)
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\amssymb.sty"
|
||||
Package: amssymb 2013/01/14 v3.01 AMS font symbols
|
||||
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\amsfonts.sty"
|
||||
Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
|
||||
\symAMSa=\mathgroup4
|
||||
\symAMSb=\mathgroup5
|
||||
LaTeX Font Info: Redeclaring math symbol \hbar on input line 98.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
|
||||
(Font) U/euf/m/n --> U/euf/b/n on input line 106.
|
||||
))
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/subfiles\subfiles.sty"
|
||||
Package: subfiles 2020/02/14 v1.6 Multi-file projects (package)
|
||||
)))
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/l3backend\l3backend-pdfmode.def"
|
||||
File: l3backend-pdfmode.def 2020-02-03 L3 backend support: PDF mode
|
||||
\l__kernel_color_stack_int=\count190
|
||||
\l__pdf_internal_box=\box47
|
||||
)
|
||||
(lecture4.aux)
|
||||
\openout1 = `lecture4.aux'.
|
||||
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 3.
|
||||
LaTeX Font Info: ... okay on input line 3.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 3.
|
||||
LaTeX Font Info: ... okay on input line 3.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 3.
|
||||
LaTeX Font Info: ... okay on input line 3.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 3.
|
||||
LaTeX Font Info: ... okay on input line 3.
|
||||
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 3.
|
||||
LaTeX Font Info: ... okay on input line 3.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 3.
|
||||
LaTeX Font Info: ... okay on input line 3.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 3.
|
||||
LaTeX Font Info: ... okay on input line 3.
|
||||
LaTeX Font Info: Trying to load font information for U+msa on input line 8.
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsa.fd"
|
||||
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
|
||||
)
|
||||
LaTeX Font Info: Trying to load font information for U+msb on input line 8.
|
||||
|
||||
("E:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsb.fd"
|
||||
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
|
||||
)
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 10--14
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 15--17
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 19--20
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 21--39
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 39--40
|
||||
|
||||
[]
|
||||
|
||||
[1
|
||||
|
||||
{C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 42--66
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 42--66
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 42--66
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 42--66
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 68--70
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 73--88
|
||||
|
||||
[]
|
||||
|
||||
[2]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 93--99
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (30.99239pt too wide) detected at line 115
|
||||
\OML/cmm/m/it/12 X \OT1/cmr/m/n/12 = \OMS/cmsy/m/n/12 f\OML/cmm/m/it/12 Sunny;
|
||||
\OT1/cmr/m/n/12 50%\OML/cmm/m/it/12 ; No\OMS/cmsy/m/n/12 g ! []f\OML/cmm/m/it
|
||||
/12 outlook:humidity; windy\OMS/cmsy/m/n/12 g
|
||||
[]
|
||||
|
||||
[3]
|
||||
|
||||
LaTeX Warning: Command \textquoteright invalid in math mode on input line 163.
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 142--164
|
||||
|
||||
[]
|
||||
|
||||
[4] (lecture4.aux) )
|
||||
Here is how much of TeX's memory you used:
|
||||
2047 strings out of 481556
|
||||
28032 string characters out of 2923622
|
||||
262310 words of memory out of 3000000
|
||||
17294 multiletter control sequences out of 15000+200000
|
||||
542187 words of font info for 61 fonts, out of 3000000 for 9000
|
||||
1141 hyphenation exceptions out of 8191
|
||||
42i,8n,44p,316b,125s stack positions out of 5000i,500n,10000p,200000b,50000s
|
||||
<C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfo
|
||||
ur/jknappen/ec/dpi600\tcrm1200.pk><E:/Program Files/MiKTeX 2.9/fonts/type1/publ
|
||||
ic/amsfonts/cm/cmbx12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfo
|
||||
nts/cm/cmex10.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/c
|
||||
mmi10.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmmi12.pf
|
||||
b><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmmi8.pfb><E:/Pro
|
||||
gram Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr12.pfb><E:/Program File
|
||||
s/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8.pfb><E:/Program Files/MiKTeX 2
|
||||
.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><E:/Program Files/MiKTeX 2.9/fonts
|
||||
/type1/public/amsfonts/cm/cmsy8.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/pu
|
||||
blic/amsfonts/cm/cmti12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/ams
|
||||
fonts/symbols/msbm10.pfb>
|
||||
Output written on lecture4.pdf (4 pages, 125496 bytes).
|
||||
PDF statistics:
|
||||
64 PDF objects out of 1000 (max. 8388607)
|
||||
0 named destinations out of 1000 (max. 500000)
|
||||
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||
|
Binary file not shown.
Binary file not shown.
@ -1 +1,164 @@
|
||||
\documentclass[../main.tex]{subfiles}
|
||||
|
||||
\begin{document}
|
||||
\section{Lecture 4 - 07-04-2020}
|
||||
|
||||
We spoke about Knn classifier with voronoi diagram
|
||||
|
||||
$$
|
||||
\hat{\ell}(\hnn) = 0 \qquad \forall Traning set
|
||||
$$
|
||||
\\
|
||||
$\hnn$ predictor needs to store entire dataset.
|
||||
\\
|
||||
\subsection{Computing $\hnn$}
|
||||
Computing $\hnn(x)$ requires computing distances between x and points in the traning set.
|
||||
\\
|
||||
$$
|
||||
\Theta(d) \quad \textit{time for each distance}
|
||||
$$\\
|
||||
|
||||
NN $\rightarrow$ 1-NN\\
|
||||
We can generalise NN in K-NN with $k = 1,3,5,7$ so odd $K$ \\
|
||||
$\hknn(x)$ = label corresponding to the majority of labels of the k closet point to
|
||||
x in the training set.\\\\
|
||||
How big could $K$ be if i have $n$ point?\\
|
||||
I look at the $k$ closest point\\
|
||||
When $k = m$?\\
|
||||
The majority, will be a constant classifier
|
||||
$\hknn$ is constant and corresponds to the majority of training labels\\
|
||||
Training error is always 0 for $\hnn$, while for $\hknn$ will be typically $>0$, with $k >
|
||||
1$\\
|
||||
Image: one dimensional classifier and training set is repeated.
|
||||
Is the plot of 1-NN classifier.\\
|
||||
Positive and negative.
|
||||
$K = 1$ error is 0.\\
|
||||
In the second line we switch to $k =3$. Second point doesn’t switch and third will
|
||||
be classify to positive and we have training mistake.\\
|
||||
Switches corresponds to border of voronoi partition.
|
||||
$$\knn \qquad \textit{For multiclass classification}$$\\
|
||||
$$
|
||||
(|Y| > 2 ) \qquad \textit{for regression } Y\equiv \barra{R}
|
||||
$$
|
||||
\\
|
||||
Average of labels of $K$ neighbours $\rightarrow$ i will get a number with prediction.
|
||||
\\
|
||||
I can weight average by distance
|
||||
\\
|
||||
You can vary this algorithm as you want.\\\\
|
||||
Let’s go back to Binary classification.\\
|
||||
The $k$ parameter is the effect of making the structure of classifier more
|
||||
complex and less complex for small value of $k$.\\\\
|
||||
--.. DISEGNO ..--
|
||||
\\
|
||||
Fix training set and test set\\
|
||||
Accury as oppose to the error
|
||||
\\\\
|
||||
Show a plot. Training error is 0 at $k = 0$.\\
|
||||
As i go further training error is higher and test error goes down. At some point
|
||||
after which training and set met and then after that training and test error goes
|
||||
up (accuracy goes down).\\
|
||||
If i run algorithm is going to be overfitting: training error and test error is high and also underfitting since testing and training are close and both high.
|
||||
Trade off point is the point in $x = 23$ (more or less).\\
|
||||
There are some heuristic to run NN algorithm without value of $k$.
|
||||
\\\\
|
||||
\textbf{History}
|
||||
\begin{itemize}
|
||||
\item $\knn$: from 1960 $\rightarrow$ $X \equiv \barra{R}^d$
|
||||
\item Tree predictor: from 1980
|
||||
\\
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Tree Predictor}
|
||||
If a give you data not welled defined in a Euclidean space.
|
||||
\\
|
||||
$X = X_1 \cdot x \cdot ... \cdot X_d \cdot x$ \qquad Medical Record
|
||||
\\
|
||||
$X_1 = \{Male, Female\}$\\
|
||||
$X_2 = \{Yes, No\}$
|
||||
\\
|
||||
so we have different data
|
||||
\\\\
|
||||
I want to avoid comparing $x_i$ with $x_j$, $i\neq j $\\
|
||||
so comparing different feature and we want to compare each feature with
|
||||
each self. I don’t want to mix them up.\\
|
||||
We can use a tree!
|
||||
\\
|
||||
I have 3 features:
|
||||
\begin{itemize}
|
||||
\item outlook $= \{sunny, overcast, rain\}$
|
||||
\item humidity $= \{[0,100]\}$
|
||||
\item windy $ = \{yes,no\}$
|
||||
\end{itemize}
|
||||
... -- DISEGNO -- ...\\\\
|
||||
Tree is a natural way of doing decision and abstraction of decision process of
|
||||
one person. It is a good way to deal with categorical variables.\\
|
||||
What kind of tree we are talking about?\\
|
||||
Tree has inner node and leaves. Leaves are associated with labels $(Y)$ and
|
||||
inner nodes are associated with test.
|
||||
\begin{itemize}
|
||||
\item Inner node $\rightarrow$ test
|
||||
\item Leaves $\rightarrow$ label in Y
|
||||
\end{itemize}
|
||||
%... -- DISEGNO -- ...
|
||||
Test if a function $f$ (NOT A PREDICTOR!) \\
|
||||
Test $ \qquad f_i \, X_i \rightarrow \{1,...,k\}$
|
||||
\\ where $k$ is the number of children (inner node) to which test is assigned
|
||||
\\
|
||||
In a tree predictor we have:
|
||||
\begin{itemize}
|
||||
\item Root node
|
||||
\item Children are ordered(i know the order of each branch that come out from the node)
|
||||
\end{itemize}
|
||||
$$
|
||||
X = \{Sunny, 50\%, No \} \quad \rightarrow \quad \textit{are the parameters for } \{outlook. humidity, windy \}
|
||||
$$
|
||||
\\
|
||||
$
|
||||
f_i =
|
||||
\begin{cases}
|
||||
1, & \mbox{if } x_2 \in [30 \%,60 \% ]
|
||||
\\
|
||||
2, & \mbox{if } otherwise \end{cases}
|
||||
$
|
||||
\\ where the numbers 1 and 2 are the children
|
||||
\\
|
||||
A test is partitioning the range of values of a certain attribute in a number of
|
||||
elements equal to number of children of of the node to which the test is
|
||||
assigned.
|
||||
\\
|
||||
$h_T(x)$ is always the label of a leaf of T\\
|
||||
This leaf is the leaf to which $x$ is \textbf{routed}
|
||||
\\
|
||||
Data space for this problem (outlook,..) is partitioned in the leaves of the tree.
|
||||
It won’t be like voronoi graph.
|
||||
How do I build a tree given a training set?
|
||||
How do i learn a tree predictor given a training set?
|
||||
\begin{itemize}
|
||||
\item Decide tree structure (how • many node, leaves ecc..)
|
||||
\item Decide test on inner nodes
|
||||
\item Decide labels on leaves
|
||||
\end{itemize}
|
||||
We have to do this all together and process will be more dynamic.
|
||||
For simplicity binary classification and fix two children for each inner node.\\\\
|
||||
$ Y = \{-1, +1 \}$
|
||||
\\ $2$ children for each inner node
|
||||
\\\\
|
||||
What's the simplest way?\\
|
||||
Initial tree and correspond to a costant classifier
|
||||
\\\\
|
||||
-- DISEGNO --
|
||||
\\\\
|
||||
\textbf{Majority of all example}
|
||||
\\\\
|
||||
-- DISEGNO --
|
||||
\\\\
|
||||
$(x_1, y_1) ... (x_m, y_m)$ \\
|
||||
$ x_t \in X$ \qquad $ y_t \in \{-1,+1\}$\\
|
||||
Training set $S = \{ (x,y) \in S$, x is routed to $\ell\}$\\
|
||||
$S_{\ell}^+$
|
||||
\\\\
|
||||
-- DISEGNO --
|
||||
\\\\
|
||||
$ S_{\ell}$ and $ S’_{\ell}$ are given by the result of the test, not the labels and $\ell$ and $\ell'$.
|
||||
\end{document}
|
@ -1,4 +1,4 @@
|
||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 15:12
|
||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.12) 12 APR 2020 15:21
|
||||
entering extended mode
|
||||
**./main.tex
|
||||
(main.tex
|
||||
@ -177,111 +177,111 @@ Underfull \hbox (badness 10000) in paragraph at lines 99--139
|
||||
[]
|
||||
|
||||
[4]) [5] (lectures/lecture2.tex
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 4--13
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 7--16
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 14--27
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 17--30
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 14--27
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 17--30
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 29--32
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 32--35
|
||||
|
||||
[]
|
||||
|
||||
[6]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 46--49
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 49--52
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 46--49
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 49--52
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 60--78
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 63--81
|
||||
|
||||
[]
|
||||
|
||||
[7]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 78--83
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 81--86
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 86--106
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 89--109
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 86--106
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 89--109
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 86--106
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 89--109
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 107--112
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 110--115
|
||||
|
||||
[]
|
||||
|
||||
[8]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 115--153
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||
|
||||
[]
|
||||
|
||||
[9]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 161--168
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 164--171
|
||||
|
||||
[]
|
||||
|
||||
[10]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 169--179
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 172--182
|
||||
|
||||
[]
|
||||
|
||||
)
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 186--33
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 189--199
|
||||
|
||||
[]
|
||||
|
||||
[11] (lectures/lecture3.tex
|
||||
) [11] (lectures/lecture3.tex
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 5--7
|
||||
|
||||
[]
|
||||
@ -457,7 +457,12 @@ Underfull \hbox (badness 10000) in paragraph at lines 187--223
|
||||
|
||||
[]
|
||||
|
||||
[16]) [17] (lectures/lecture4.tex) [18] (lectures/lecture5.tex) [19]
|
||||
[16]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 225--226
|
||||
|
||||
[]
|
||||
|
||||
) [17] (lectures/lecture4.tex) [18] (lectures/lecture5.tex) [19]
|
||||
(lectures/lecture6.tex) [20] (lectures/lecture7.tex) [21]
|
||||
(lectures/lecture8.tex) [22] (lectures/lecture9.tex) [23]
|
||||
(lectures/lecture10.tex
|
||||
@ -561,7 +566,7 @@ c/amsfonts/cm/cmsy6.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfont
|
||||
s/cm/cmsy8.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmti
|
||||
12.pfb><E:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/symbols/msbm10.
|
||||
pfb>
|
||||
Output written on main.pdf (27 pages, 198551 bytes).
|
||||
Output written on main.pdf (27 pages, 198691 bytes).
|
||||
PDF statistics:
|
||||
146 PDF objects out of 1000 (max. 8388607)
|
||||
0 named destinations out of 1000 (max. 500000)
|
||||
|
Binary file not shown.
Binary file not shown.
@ -15,7 +15,8 @@
|
||||
|
||||
\newcommand\barra[1]{\mathbb{#1}}
|
||||
\newcommand\hnn{h_{NN}}
|
||||
|
||||
\newcommand\hknn{h_{k-NN}}
|
||||
\newcommand\knn{K_{NN}}
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user