up
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 8.8 KiB |
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 18 KiB |
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 15 KiB |
@ -5,3 +5,14 @@
|
|||||||
\@writefile{lof}{\addvspace {10\p@ }}
|
\@writefile{lof}{\addvspace {10\p@ }}
|
||||||
\@writefile{lot}{\addvspace {10\p@ }}
|
\@writefile{lot}{\addvspace {10\p@ }}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {1.1}Non parametrics algorithms}{1}\protected@file@percent }
|
\@writefile{toc}{\contentsline {section}{\numberline {1.1}Non parametrics algorithms}{1}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.1}Theorem: No free lunch}{1}\protected@file@percent }
|
||||||
|
\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Tree building}}{2}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {section}{\numberline {1.2}Highly Parametric Learning Algorithm}{3}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.1}Linear Predictors}{3}\protected@file@percent }
|
||||||
|
\@writefile{lof}{\contentsline {figure}{\numberline {1.2}{\ignorespaces Dot product}}{3}\protected@file@percent }
|
||||||
|
\@writefile{lof}{\contentsline {figure}{\numberline {1.3}{\ignorespaces Dot product}}{4}\protected@file@percent }
|
||||||
|
\@writefile{lof}{\contentsline {figure}{\numberline {1.4}{\ignorespaces Hyperplane}}{4}\protected@file@percent }
|
||||||
|
\@writefile{lof}{\contentsline {figure}{\numberline {1.5}{\ignorespaces Hyperplane}}{5}\protected@file@percent }
|
||||||
|
\@writefile{lof}{\contentsline {figure}{\numberline {1.6}{\ignorespaces Hyperplane}}{5}\protected@file@percent }
|
||||||
|
\@writefile{lof}{\contentsline {figure}{\numberline {1.7}{\ignorespaces Example of one dimensional hyperplane}}{6}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.2}MinDisagreement}{7}\protected@file@percent }
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 20 APR 2020 11:35
|
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 21 APR 2020 10:30
|
||||||
entering extended mode
|
entering extended mode
|
||||||
**./lecture12.tex
|
**./lecture12.tex
|
||||||
(lecture12.tex
|
(lecture12.tex
|
||||||
@ -309,26 +309,156 @@ G,.JBIG2,.JB2,.eps]
|
|||||||
(grfext) \AppendGraphicsExtensions on input line 504.
|
(grfext) \AppendGraphicsExtensions on input line 504.
|
||||||
)
|
)
|
||||||
Chapter 1.
|
Chapter 1.
|
||||||
|
LaTeX Font Info: Trying to load font information for U+msa on input line 11.
|
||||||
|
|
||||||
|
("C:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsa.fd"
|
||||||
|
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
|
||||||
|
)
|
||||||
|
LaTeX Font Info: Trying to load font information for U+msb on input line 11.
|
||||||
|
|
||||||
|
|
||||||
|
("C:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsb.fd"
|
||||||
|
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
|
||||||
|
)
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 8--17
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 8--17
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
<../img/lez12-img1.JPG, id=1, 314.67563pt x 160.34906pt>
|
||||||
|
File: ../img/lez12-img1.JPG Graphic file (type jpg)
|
||||||
|
<use ../img/lez12-img1.JPG>
|
||||||
|
Package pdftex.def Info: ../img/lez12-img1.JPG used on input line 35.
|
||||||
|
(pdftex.def) Requested size: 234.00238pt x 119.24121pt.
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--44
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--44
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
LaTeX Warning: `h' float specifier changed to `ht'.
|
||||||
|
|
||||||
[1
|
[1
|
||||||
|
|
||||||
|
|
||||||
{C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
|
{C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
|
||||||
(lecture12.aux) )
|
Underfull \hbox (badness 10000) in paragraph at lines 49--60
|
||||||
Here is how much of TeX's memory you used:
|
|
||||||
5036 strings out of 480934
|
[]
|
||||||
67935 string characters out of 2909670
|
|
||||||
328090 words of memory out of 3000000
|
[2 <../img/lez12-img1.JPG>]
|
||||||
20817 multiletter control sequences out of 15000+200000
|
Underfull \hbox (badness 10000) in paragraph at lines 65--67
|
||||||
536495 words of font info for 30 fonts, out of 3000000 for 9000
|
|
||||||
1141 hyphenation exceptions out of 8191
|
[]
|
||||||
42i,5n,50p,333b,142s stack positions out of 5000i,500n,10000p,200000b,50000s
|
|
||||||
<C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/
|
<../img/lez12-img2.JPG, id=24, 65.49469pt x 61.73062pt>
|
||||||
jknappen/ec/dpi600\ecrm1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fo
|
File: ../img/lez12-img2.JPG Graphic file (type jpg)
|
||||||
nts/pk/ljfour/jknappen/ec/dpi600\ecbx1728.pk> <C:\Users\AndreDany\AppData\Local
|
<use ../img/lez12-img2.JPG>
|
||||||
\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx2488.pk>
|
Package pdftex.def Info: ../img/lez12-img2.JPG used on input line 83.
|
||||||
Output written on lecture12.pdf (1 page, 13581 bytes).
|
(pdftex.def) Requested size: 77.99881pt x 73.51778pt.
|
||||||
PDF statistics:
|
|
||||||
47 PDF objects out of 1000 (max. 8388607)
|
Underfull \hbox (badness 10000) in paragraph at lines 80--88
|
||||||
0 named destinations out of 1000 (max. 500000)
|
|
||||||
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
[]
|
||||||
|
|
||||||
|
<../img/lez12-img3.JPG, id=25, 106.14656pt x 110.66344pt>
|
||||||
|
File: ../img/lez12-img3.JPG Graphic file (type jpg)
|
||||||
|
<use ../img/lez12-img3.JPG>
|
||||||
|
Package pdftex.def Info: ../img/lez12-img3.JPG used on input line 108.
|
||||||
|
(pdftex.def) Requested size: 77.99881pt x 81.32059pt.
|
||||||
|
|
||||||
|
LaTeX Warning: `h' float specifier changed to `ht'.
|
||||||
|
|
||||||
|
<../img/lez12-img4.JPG, id=26, 137.7647pt x 115.18031pt>
|
||||||
|
File: ../img/lez12-img4.JPG Graphic file (type jpg)
|
||||||
|
<use ../img/lez12-img4.JPG>
|
||||||
|
Package pdftex.def Info: ../img/lez12-img4.JPG used on input line 124.
|
||||||
|
(pdftex.def) Requested size: 155.99762pt x 130.43692pt.
|
||||||
|
[3 <../img/lez12-img2.JPG>]
|
||||||
|
<../img/lez12-img5.JPG, id=31, 310.91156pt x 131.74219pt>
|
||||||
|
File: ../img/lez12-img5.JPG Graphic file (type jpg)
|
||||||
|
<use ../img/lez12-img5.JPG>
|
||||||
|
Package pdftex.def Info: ../img/lez12-img5.JPG used on input line 147.
|
||||||
|
(pdftex.def) Requested size: 234.00238pt x 99.15414pt.
|
||||||
|
|
||||||
|
|
||||||
|
LaTeX Warning: `h' float specifier changed to `ht'.
|
||||||
|
|
||||||
|
[4 <../img/lez12-img3.JPG> <../img/lez12-img4.JPG>]
|
||||||
|
<../img/lez12-img6.JPG, id=35, 116.68594pt x 99.37125pt>
|
||||||
|
File: ../img/lez12-img6.JPG Graphic file (type jpg)
|
||||||
|
<use ../img/lez12-img6.JPG>
|
||||||
|
Package pdftex.def Info: ../img/lez12-img6.JPG used on input line 156.
|
||||||
|
(pdftex.def) Requested size: 117.00119pt x 99.64392pt.
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 173--176
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
[5 <../img/lez12-img5.JPG> <../img/lez12-img6.JPG>]
|
||||||
|
<../img/lez12-img7.JPG, id=39, 199.49532pt x 143.78719pt>
|
||||||
|
File: ../img/lez12-img7.JPG Graphic file (type jpg)
|
||||||
|
<use ../img/lez12-img7.JPG>
|
||||||
|
Package pdftex.def Info: ../img/lez12-img7.JPG used on input line 180.
|
||||||
|
(pdftex.def) Requested size: 234.00238pt x 168.66693pt.
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 177--187
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 189--194
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 199--210
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 199--210
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
[6 <../img/lez12-img7.JPG>] [7] (lecture12.aux) )
|
||||||
|
Here is how much of TeX's memory you used:
|
||||||
|
5147 strings out of 480934
|
||||||
|
69792 string characters out of 2909670
|
||||||
|
333085 words of memory out of 3000000
|
||||||
|
20891 multiletter control sequences out of 15000+200000
|
||||||
|
546454 words of font info for 57 fonts, out of 3000000 for 9000
|
||||||
|
1141 hyphenation exceptions out of 8191
|
||||||
|
42i,7n,50p,333b,239s stack positions out of 5000i,500n,10000p,200000b,50000s
|
||||||
|
<C:\Users\AndreDany\AppData\L
|
||||||
|
ocal\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\tcrm1200.pk> <C:\Users\Andre
|
||||||
|
Dany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx1440.pk> <
|
||||||
|
C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\
|
||||||
|
ecbx1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknap
|
||||||
|
pen/ec/dpi600\ecrm1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/p
|
||||||
|
k/ljfour/jknappen/ec/dpi600\ecbx1728.pk> <C:\Users\AndreDany\AppData\Local\MiKT
|
||||||
|
eX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx2488.pk><C:/Program Files/MiKTeX
|
||||||
|
2.9/fonts/type1/public/amsfonts/cm/cmex10.pfb><C:/Program Files/MiKTeX 2.9/font
|
||||||
|
s/type1/public/amsfonts/cm/cmmi12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/
|
||||||
|
public/amsfonts/cm/cmmi6.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/am
|
||||||
|
sfonts/cm/cmmi8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm
|
||||||
|
/cmr12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr6.pfb
|
||||||
|
><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8.pfb><C:/Progr
|
||||||
|
am Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><C:/Program Files
|
||||||
|
/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy8.pfb><C:/Program Files/MiKTeX 2
|
||||||
|
.9/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
|
||||||
|
Output written on lecture12.pdf (7 pages, 267916 bytes).
|
||||||
|
PDF statistics:
|
||||||
|
263 PDF objects out of 1000 (max. 8388607)
|
||||||
|
0 named destinations out of 1000 (max. 500000)
|
||||||
|
36 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||||
|
|
||||||
|
@ -5,6 +5,213 @@
|
|||||||
|
|
||||||
\section{Non parametrics algorithms}
|
\section{Non parametrics algorithms}
|
||||||
|
|
||||||
|
We talk about \bred{consistency}: as the training size grows unbounded the expected risk of algorithms converge to Bayes Risk.
|
||||||
|
\\\\
|
||||||
|
Now we talk about \bred{non parametric algorithm}: the structure of the model is determined by the data.\\
|
||||||
|
Structure of the model is fixed, like the structure of a Neural Network but in non parametric algorithm will change structure of the model as the data grows ($\knn$ and tree predictor).\\
|
||||||
|
If I live the tree grow unboundenly then we get a non parametric tree, but if we bound the grows then we get a parametric one.
|
||||||
|
\\\\
|
||||||
|
The converve rate of Bayes Risk (in this case doubled) was small.
|
||||||
|
Converge of $1$-$NN$ to $2 \, \ell_D(f^*)$ is $m^{-\frac{1}{d+1}}$ so we need an esponential in the dimension. And we need this is under Lips assumption of $\eta$.
|
||||||
|
\\ It's possible to converge to Bayes Risk and it's called \bred{No free lunch}.
|
||||||
|
\subsection{Theorem: No free lunch}
|
||||||
|
Let a sequenece of number
|
||||||
|
$a_1, a_2$ ... $\in \barra{R} $
|
||||||
|
such that they converge to 0.
|
||||||
|
\\Also $\frac{1}{22222222} \geq a_1 \geq a_2 \geq ...$ $\forall A $
|
||||||
|
for binary classification $\exists D$ s. t.
|
||||||
|
\\$\ell_D(f^*) = 0 $ (zero-one loss) so Bayes risk is zero and
|
||||||
|
\expt{\ell_D\left(A(S_M)\right)} $\geq a_m \quad \forall m \geq 1$
|
||||||
|
\\
|
||||||
|
Any Bayes Optimal you should be prepared to do so on long period of time. This means that:
|
||||||
|
\begin{itemize}
|
||||||
|
\item For specific data distribution $D$, then $A$ may converge fast to Bayes Risk.
|
||||||
|
\item If $\eta$ is Lipschitz then it is continous. This mean that we perturb the input by the output doesno't change too much.
|
||||||
|
\item If Bayes Risk is 0 ($\ell_D(f^*) = 0$) function will be discontinous
|
||||||
|
\end{itemize}
|
||||||
|
This result typically people think twice for using consistent algorithm because
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.6\linewidth]{../img/lez12-img1.JPG}
|
||||||
|
\caption{Tree building}
|
||||||
|
%\label{fig:}
|
||||||
|
\end{figure}\\
|
||||||
|
\\
|
||||||
|
I have Bayes risk and some non conssitent algorithm that will converge to some value ($\ell_D(\hat{h}^*)$).
|
||||||
|
Maybe i have Bayes risk and the convergence takes a lot on increasing data points. Before converging was better non parametric (?..)
|
||||||
|
\\\\
|
||||||
|
Picture for binary classification, (similar for other losses)
|
||||||
|
\begin{itemize}
|
||||||
|
\item Under no assumption on $\eta$, the typicall "parametric" converge rate to risk of best model in $H$ (including ERM) is $m^{-\frac{1}{2}}$. (Bias error may be high)
|
||||||
|
\item Under no assumption on $\eta$ there is no guaranteed convergence to Bayes Risk (in general) and this is \bred{no-free-lunch} that guaranteed me no convergence rate.
|
||||||
|
\item Under Lipshtiz assunption on $\eta$ the typical non parametric convergence to Bayes Risk is $m^{-\frac{1}{d+1}}$. This is exponentially worst than the parametric convergency rate.
|
||||||
|
\end{itemize}
|
||||||
|
The exponential depencendece on $d$ is called \bred{Curse of dimnsionality}.
|
||||||
|
\\ But if I assume small number of dimension $\longrightarrow$ $\knn$ is ok if $d$ is small (and $\eta$ is "easy")
|
||||||
|
\\
|
||||||
|
If you have a non parametric algorithm (no Bayes error but may have exponentially infinity training error).
|
||||||
|
I want them to be balanced and avoid bias and variance. We need to introduce a bit of bias in controlled way.
|
||||||
|
\\
|
||||||
|
Inserting bias to reducing variance error. So we sacrify a bit to get a better variance error.
|
||||||
|
\\\\
|
||||||
|
It could be good to inject bias in order to reduce the variance error. In practise instead of having 0 training error i want to have a larger training error and hope to reduce overfitting sacrifing a bit in the training error.
|
||||||
|
\\
|
||||||
|
I can increase bias in different technics: one is the unsamble methods.
|
||||||
|
|
||||||
|
|
||||||
|
\section{Highly Parametric Learning Algorithm}
|
||||||
|
|
||||||
|
\subsection{Linear Predictors}
|
||||||
|
Our domain is Euclidean space (so we have points of numbers).
|
||||||
|
\\
|
||||||
|
$$
|
||||||
|
X \ is \ \barra{R}^d \qquad x = (x_1,..,x_d)
|
||||||
|
$$
|
||||||
|
A linear predictor will be a linear function of the data points.
|
||||||
|
$$
|
||||||
|
h: \barra{R}^d \longrightarrow Y \qquad h\left(x\right) = f(w^T \, x) \quad w \in \barra{R}^d
|
||||||
|
$$
|
||||||
|
$$
|
||||||
|
f: \barra{R} \longrightarrow Y
|
||||||
|
$$
|
||||||
|
And this is the dot product that is
|
||||||
|
$$
|
||||||
|
w^T \, x = \sum_{t = 1}^{d} w_i x_i = \| w \| \, \| x \| \cos \Theta
|
||||||
|
$$
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.2\linewidth]{../img/lez12-img2.JPG}
|
||||||
|
\caption{Dot product}
|
||||||
|
%\label{fig:}
|
||||||
|
\end{figure}\\
|
||||||
|
Suppose we look a regression with square loss.\\
|
||||||
|
$$ Y = \barra{R} \qquad h(x) = w^T\, x \quad w \in \barra{R}^d
|
||||||
|
$$
|
||||||
|
$
|
||||||
|
f^*(x) = $\expt{ Y| X=x }
|
||||||
|
\\
|
||||||
|
Binary classification with zero-one loss
|
||||||
|
$ Y = \{ -1,1\}$
|
||||||
|
We cannot use this since is not a real number but i can do:
|
||||||
|
$$
|
||||||
|
h(x) = sgn\left(w^T\, x\right) \qquad sgn(x) = \begin{cases}
|
||||||
|
+1 \ if \ z > 0\\
|
||||||
|
-1 \ if \ z \leq 0
|
||||||
|
\end{cases}
|
||||||
|
$$
|
||||||
|
where sgn is a sign function.
|
||||||
|
Linear classifier.
|
||||||
|
\\
|
||||||
|
$\| X \| \cos \Theta $ is the length of the projection of $x$ onto $w$
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.2\linewidth]{../img/lez12-img3.JPG}
|
||||||
|
\caption{Dot product}
|
||||||
|
%\label{fig:}
|
||||||
|
\end{figure}\\
|
||||||
|
Now let's look at this set:
|
||||||
|
$$
|
||||||
|
\{ x \in \barra{R}^d : w^Tx = c\}
|
||||||
|
$$
|
||||||
|
This is a hyperplane.
|
||||||
|
$$
|
||||||
|
\|w \| \| x \| \cos \Theta = c
|
||||||
|
\qquad
|
||||||
|
\|x \| \cos \Theta = \frac{c}{\| w\|}
|
||||||
|
$$
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.4\linewidth]{../img/lez12-img4.JPG}
|
||||||
|
\caption{Hyperplane}
|
||||||
|
%\label{fig:}
|
||||||
|
\end{figure}\\
|
||||||
|
So $ (w,c)$ describe an hyperplane.
|
||||||
|
\\
|
||||||
|
We can do binary classification using the hyperplane. Any points that lives in the positive half space and the negative. So the hyperplane is splitting in halfs.
|
||||||
|
$ H \equiv \{ x \in \barra{R}^d : w^T x = c \}$\
|
||||||
|
$$
|
||||||
|
H^+ \equiv \{x \in \barra{R}^d : w^Tx > c \} \qquad \textbf{positive $h_s$}
|
||||||
|
$$
|
||||||
|
$$
|
||||||
|
H^- \equiv \{x \in \barra{R}^d : w^Tx \leq \} \qquad \textbf{negative $h_s$}
|
||||||
|
$$\
|
||||||
|
$$ h(x) =
|
||||||
|
\begin{cases}
|
||||||
|
+1 \ if \ x \in H^+\\
|
||||||
|
-1 \ if \ x \not\in H^-
|
||||||
|
\end{cases} \qquad
|
||||||
|
h(x) = sgn (w^T -c)
|
||||||
|
$$
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.6\linewidth]{../img/lez12-img5.JPG}
|
||||||
|
\caption{Hyperplane}
|
||||||
|
%\label{fig:}
|
||||||
|
\end{figure}
|
||||||
|
\newpage
|
||||||
|
$h_1$ is non-homogenous linear classifier.\\
|
||||||
|
$h_2$ is homogenous linear classifier.
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.3\linewidth]{../img/lez12-img6.JPG}
|
||||||
|
\caption{Hyperplane}
|
||||||
|
%\label{fig:}
|
||||||
|
\end{figure}
|
||||||
|
Any homogenous classifier is equivalent to this:
|
||||||
|
$$
|
||||||
|
\{x \in \barra{R}^d : X = c \} \textbf{ is equivalent to \quad} \{x: \barra{R}^{d+1} : \nu^T x = 0 \}
|
||||||
|
$$
|
||||||
|
$$
|
||||||
|
\nu = (w_1,..,w_d, -c) \qquad x' = (x_1,..., x_d, 1)
|
||||||
|
$$
|
||||||
|
So we added a dimension.
|
||||||
|
$$
|
||||||
|
w^T x = c \ \Leftrightarrow \ \nu^T x' = 0
|
||||||
|
$$
|
||||||
|
$$
|
||||||
|
\sum_{i} w_1 x_1 = c \ \Leftrightarrow \ \sum_{i} w_1 x_1 -c = 0
|
||||||
|
$$\\\\
|
||||||
|
\bred{Rule}:\\
|
||||||
|
\textbf{When you learn predictor just add an extra feature to your data points, set it ot 1 and forget about non- homogenous stuff.}
|
||||||
|
\newpage
|
||||||
|
One dimensional example
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.6\linewidth]{../img/lez12-img7.JPG}
|
||||||
|
\caption{Example of one dimensional hyperplane}
|
||||||
|
%\label{fig:}
|
||||||
|
\end{figure}\\
|
||||||
|
I have negative (left of $(x,1)$ and positive point (left of $(z,1$) classified
|
||||||
|
\\\\
|
||||||
|
Now i want to learn linear classifier. How can i do it?
|
||||||
|
$$
|
||||||
|
H_d = \{ \ h : \exists w \in \barra{R}^d \ h(x) = sgn(w^T x) \ \}
|
||||||
|
$$
|
||||||
|
Parametric!
|
||||||
|
\\
|
||||||
|
We expect high bias a low variance.
|
||||||
|
\\
|
||||||
|
$$
|
||||||
|
ERM \qquad \hat{h}_S = arg \min_{h \in H_d} \ \frac{1}{m} \cdot \sum_{t = 1}^{m} I \{h(x_t) \neq y_t \} =
|
||||||
|
$$
|
||||||
|
$$
|
||||||
|
= \ arg \min_{w \in \barra{R}^d} \ \frac{1}{m} \cdot \sum_{t = 1}^{m} I \, \{ \, y_t \, w^T x_t \leq 0 \, \}
|
||||||
|
$$
|
||||||
|
A bad optimisation problem!
|
||||||
|
\\\\
|
||||||
|
\bred{FACT}:\\
|
||||||
|
It is unlikely to find an algorithm that solves ERM for $H_d$ and zero-one loss efficiently.
|
||||||
|
\\
|
||||||
|
\bred{NP completeness problems!}
|
||||||
|
\\ It's very unlikely to solve this problem.
|
||||||
|
\\
|
||||||
|
This problem is called \textbf{MinDisagreement}
|
||||||
|
\\
|
||||||
|
\subsection{MinDisagreement}
|
||||||
|
Instance: $(x_1, y_1) ... (x_m, y_m) \in \{ 0,1 \}^d \, x \, \{-1, 1 \}, \quad k \in \barra{N}$\\
|
||||||
|
Question: Is there $w \in x \, D^d $ \\ s.t $y_t \, w^T x_t \leq 0$ for at most $k$ indices $t \in \{1,...m\}$
|
||||||
|
\\
|
||||||
|
This is NP-complete!
|
||||||
%\expt{\ell(\hat{\ell}) + 2}
|
%\expt{\ell(\hat{\ell}) + 2}
|
||||||
|
|
||||||
|
|
||||||
|
@ -125,5 +125,8 @@
|
|||||||
\@writefile{lof}{\addvspace {10\p@ }}
|
\@writefile{lof}{\addvspace {10\p@ }}
|
||||||
\@writefile{lot}{\addvspace {10\p@ }}
|
\@writefile{lot}{\addvspace {10\p@ }}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {12.1}Non parametrics algorithms}{75}\protected@file@percent }
|
\@writefile{toc}{\contentsline {section}{\numberline {12.1}Non parametrics algorithms}{75}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {subsection}{\numberline {12.1.1}Theorem: No free lunch}{75}\protected@file@percent }
|
||||||
|
\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces Tree building}}{76}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {section}{\numberline {12.2}Highly Parametric Learning Algorithm}{77}\protected@file@percent }
|
||||||
\bibstyle{abbrv}
|
\bibstyle{abbrv}
|
||||||
\bibdata{main}
|
\bibdata{main}
|
||||||
|
@ -58,3 +58,4 @@
|
|||||||
\contentsline {figure}{\numberline {11.3}{\ignorespaces Shape of the function}}{70}%
|
\contentsline {figure}{\numberline {11.3}{\ignorespaces Shape of the function}}{70}%
|
||||||
\contentsline {figure}{\numberline {11.4}{\ignorespaces Parametric and non parametric growing as training set getting larger}}{74}%
|
\contentsline {figure}{\numberline {11.4}{\ignorespaces Parametric and non parametric growing as training set getting larger}}{74}%
|
||||||
\addvspace {10\p@ }
|
\addvspace {10\p@ }
|
||||||
|
\contentsline {figure}{\numberline {12.1}{\ignorespaces Tree building}}{76}%
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 20 APR 2020 15:23
|
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 21 APR 2020 09:36
|
||||||
entering extended mode
|
entering extended mode
|
||||||
**./main.tex
|
**./main.tex
|
||||||
(main.tex
|
(main.tex
|
||||||
@ -2050,20 +2050,54 @@ Underfull \hbox (badness 10000) in paragraph at lines 296--333
|
|||||||
|
|
||||||
[73]) [74 <./lectures/../img/lez11-img4.JPG>] (lectures/lecture12.tex
|
[73]) [74 <./lectures/../img/lez11-img4.JPG>] (lectures/lecture12.tex
|
||||||
Chapter 12.
|
Chapter 12.
|
||||||
) [75
|
|
||||||
|
|
||||||
] (main.bbl
|
Underfull \hbox (badness 10000) in paragraph at lines 8--17
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 8--17
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
<lectures/../img/lez12-img1.JPG, id=317, 314.67563pt x 160.34906pt>
|
||||||
|
File: lectures/../img/lez12-img1.JPG Graphic file (type jpg)
|
||||||
|
<use lectures/../img/lez12-img1.JPG>
|
||||||
|
Package pdftex.def Info: lectures/../img/lez12-img1.JPG used on input line 35.
|
||||||
|
|
||||||
|
(pdftex.def) Requested size: 234.00238pt x 119.24121pt.
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--44
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--44
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
LaTeX Warning: `h' float specifier changed to `ht'.
|
||||||
|
|
||||||
|
[75
|
||||||
|
|
||||||
|
]
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 49--60
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
) [76 <./lectures/../img/lez12-img1.JPG>] [77] (main.bbl
|
||||||
|
|
||||||
LaTeX Warning: Empty `thebibliography' environment on input line 3.
|
LaTeX Warning: Empty `thebibliography' environment on input line 3.
|
||||||
|
|
||||||
) [76
|
) [78
|
||||||
|
|
||||||
] (main.aux) )
|
] (main.aux) )
|
||||||
Here is how much of TeX's memory you used:
|
Here is how much of TeX's memory you used:
|
||||||
5490 strings out of 480934
|
5496 strings out of 480934
|
||||||
80318 string characters out of 2909670
|
80492 string characters out of 2909670
|
||||||
336254 words of memory out of 3000000
|
336249 words of memory out of 3000000
|
||||||
21133 multiletter control sequences out of 15000+200000
|
21138 multiletter control sequences out of 15000+200000
|
||||||
561784 words of font info for 96 fonts, out of 3000000 for 9000
|
561784 words of font info for 96 fonts, out of 3000000 for 9000
|
||||||
1141 hyphenation exceptions out of 8191
|
1141 hyphenation exceptions out of 8191
|
||||||
34i,13n,42p,311b,358s stack positions out of 5000i,500n,10000p,200000b,50000s
|
34i,13n,42p,311b,358s stack positions out of 5000i,500n,10000p,200000b,50000s
|
||||||
@ -2095,9 +2129,9 @@ s/cm/cmr8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy1
|
|||||||
/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy8.pfb><C:/Program
|
/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy8.pfb><C:/Program
|
||||||
Files/MiKTeX 2.9/fonts/type1/public/amsfonts/symbols/msam10.pfb><C:/Program Fil
|
Files/MiKTeX 2.9/fonts/type1/public/amsfonts/symbols/msam10.pfb><C:/Program Fil
|
||||||
es/MiKTeX 2.9/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
|
es/MiKTeX 2.9/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
|
||||||
Output written on main.pdf (77 pages, 2082324 bytes).
|
Output written on main.pdf (79 pages, 2109982 bytes).
|
||||||
PDF statistics:
|
PDF statistics:
|
||||||
806 PDF objects out of 1000 (max. 8388607)
|
816 PDF objects out of 1000 (max. 8388607)
|
||||||
0 named destinations out of 1000 (max. 500000)
|
0 named destinations out of 1000 (max. 500000)
|
||||||
246 words of extra memory for PDF output out of 10000 (max. 10000000)
|
251 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@
|
|||||||
\titlespacing*{\chapter}{0pt}{-80pt}{40pt}
|
\titlespacing*{\chapter}{0pt}{-80pt}{40pt}
|
||||||
\chapterfont{\color{Blue}}
|
\chapterfont{\color{Blue}}
|
||||||
\sectionfont{\color{DarkGreen}}
|
\sectionfont{\color{DarkGreen}}
|
||||||
\subsectionfont{\color{BrickRed}}
|
\subsectionfont{\color{red}}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,3 +52,5 @@
|
|||||||
\contentsline {subsection}{\numberline {11.2.1}Example of parametric algorithms}{74}%
|
\contentsline {subsection}{\numberline {11.2.1}Example of parametric algorithms}{74}%
|
||||||
\contentsline {chapter}{\numberline {12}Lecture 12 - 21-04-2020}{75}%
|
\contentsline {chapter}{\numberline {12}Lecture 12 - 21-04-2020}{75}%
|
||||||
\contentsline {section}{\numberline {12.1}Non parametrics algorithms}{75}%
|
\contentsline {section}{\numberline {12.1}Non parametrics algorithms}{75}%
|
||||||
|
\contentsline {subsection}{\numberline {12.1.1}Theorem: No free lunch}{75}%
|
||||||
|
\contentsline {section}{\numberline {12.2}Highly Parametric Learning Algorithm}{77}%
|
||||||
|