mirror of
https://github.com/Andreaierardi/Master-DataScience-Notes.git
synced 2025-01-05 17:15:56 +01:00
continue lecture 7
This commit is contained in:
parent
616738422d
commit
9188535fe9
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
Binary file not shown.
After Width: | Height: | Size: 9.8 KiB |
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
@ -4,3 +4,10 @@
|
||||
\@writefile{toc}{\contentsline {chapter}{\numberline {1}Lecture 7 - 07-04-2020}{1}\protected@file@percent }
|
||||
\@writefile{lof}{\addvspace {10\p@ }}
|
||||
\@writefile{lot}{\addvspace {10\p@ }}
|
||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.0.1}Chernoff-Hoffding bound}{1}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {subsubsection}{Union Bound}{2}\protected@file@percent }
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Example}}{2}\protected@file@percent }
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {1.2}{\ignorespaces Example}}{3}\protected@file@percent }
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {1.3}{\ignorespaces Example}}{3}\protected@file@percent }
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {1.4}{\ignorespaces Example}}{4}\protected@file@percent }
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {1.5}{\ignorespaces Example}}{4}\protected@file@percent }
|
||||
|
@ -1,4 +1,4 @@
|
||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 13 APR 2020 20:57
|
||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 14 APR 2020 21:52
|
||||
entering extended mode
|
||||
**./lecture7.tex
|
||||
(lecture7.tex
|
||||
@ -336,32 +336,139 @@ Underfull \hbox (badness 10000) in paragraph at lines 29--33
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 46--49
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (0.51405pt too wide) detected at line 54
|
||||
\U/msb/m/n/12 P [] \OMS/cmsy/m/n/12 \OML/cmm/m/it/12 e[] or \U/msb/m/n/12 P [
|
||||
] \OMS/cmsy/m/n/12 \OML/cmm/m/it/12 e[]
|
||||
[]
|
||||
|
||||
[1
|
||||
|
||||
|
||||
{C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
|
||||
(lecture7.aux) )
|
||||
Here is how much of TeX's memory you used:
|
||||
5070 strings out of 480934
|
||||
68436 string characters out of 2909670
|
||||
331037 words of memory out of 3000000
|
||||
20834 multiletter control sequences out of 15000+200000
|
||||
539297 words of font info for 45 fonts, out of 3000000 for 9000
|
||||
1141 hyphenation exceptions out of 8191
|
||||
42i,5n,50p,332b,124s stack positions out of 5000i,500n,10000p,200000b,50000s
|
||||
<C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/j
|
||||
knappen/ec/dpi600\ecrm1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fon
|
||||
ts/pk/ljfour/jknappen/ec/dpi600\ecbx2488.pk><C:/Program Files/MiKTeX 2.9/fonts/
|
||||
type1/public/amsfonts/cm/cmex10.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/pu
|
||||
blic/amsfonts/cm/cmmi12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/ams
|
||||
fonts/cm/cmmi8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/
|
||||
cmr12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8.pfb>
|
||||
<C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><C:/Prog
|
||||
ram Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy8.pfb><C:/Program Files
|
||||
/MiKTeX 2.9/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
|
||||
Output written on lecture7.pdf (1 page, 85469 bytes).
|
||||
PDF statistics:
|
||||
98 PDF objects out of 1000 (max. 8388607)
|
||||
0 named destinations out of 1000 (max. 500000)
|
||||
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||
<../img/lez7-img1.JPG, id=17, 188.95593pt x 88.83188pt>
|
||||
File: ../img/lez7-img1.JPG Graphic file (type jpg)
|
||||
<use ../img/lez7-img1.JPG>
|
||||
Package pdftex.def Info: ../img/lez7-img1.JPG used on input line 73.
|
||||
(pdftex.def) Requested size: 117.00119pt x 55.0047pt.
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 70--79
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 70--79
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 81--87
|
||||
|
||||
[]
|
||||
|
||||
<../img/lez7-img2.JPG, id=18, 79.79813pt x 73.02281pt>
|
||||
File: ../img/lez7-img2.JPG Graphic file (type jpg)
|
||||
<use ../img/lez7-img2.JPG>
|
||||
Package pdftex.def Info: ../img/lez7-img2.JPG used on input line 98.
|
||||
(pdftex.def) Requested size: 77.99881pt x 71.37688pt.
|
||||
|
||||
LaTeX Warning: `h' float specifier changed to `ht'.
|
||||
|
||||
|
||||
Overfull \hbox (27.96684pt too wide) detected at line 111
|
||||
\U/msb/m/n/12 P [] \OMS/cmsy/m/n/12 \U/msb/m/n/12 P [] \OMS/cmsy/m/n/12 [ \
|
||||
U/msb/m/n/12 P [] \OMS/cmsy/m/n/12
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (4.4292pt too wide) detected at line 118
|
||||
\OMS/cmsy/m/n/12 \U/msb/m/n/12 P [] \OT1/cmr/m/n/12 + \U/msb/m/n/12 P [] \OM
|
||||
S/cmsy/m/n/12 \OT1/cmr/m/n/12 2 \OMS/cmsy/m/n/12 \OML/cmm/m/it/12 e[] \OM
|
||||
S/cmsy/m/n/12 ) [][]
|
||||
[]
|
||||
|
||||
[2 <../img/lez7-img1.JPG>]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 122--125
|
||||
|
||||
[]
|
||||
|
||||
<../img/lez7-img3.JPG, id=25, 388.45125pt x 124.96687pt>
|
||||
File: ../img/lez7-img3.JPG Graphic file (type jpg)
|
||||
<use ../img/lez7-img3.JPG>
|
||||
Package pdftex.def Info: ../img/lez7-img3.JPG used on input line 132.
|
||||
(pdftex.def) Requested size: 195.0pt x 62.73308pt.
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 127--145
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 147--159
|
||||
|
||||
[]
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 147--159
|
||||
|
||||
[]
|
||||
|
||||
[3 <../img/lez7-img2.JPG> <../img/lez7-img3.JPG>]
|
||||
Overfull \hbox (76.34352pt too wide) detected at line 161
|
||||
\OML/cmm/m/it/12 `[] [] \OT1/cmr/m/n/12 = \OML/cmm/m/it/12 min `[] [] [] [] []
|
||||
`[] [] [][]\OMS/cmsy/m/n/12 ! []
|
||||
[]
|
||||
|
||||
<../img/lez7-img4.JPG, id=29, 245.41687pt x 77.53969pt>
|
||||
File: ../img/lez7-img4.JPG Graphic file (type jpg)
|
||||
<use ../img/lez7-img4.JPG>
|
||||
Package pdftex.def Info: ../img/lez7-img4.JPG used on input line 165.
|
||||
(pdftex.def) Requested size: 117.00119pt x 36.9666pt.
|
||||
<../img/lez7-img5.JPG, id=30, 234.1247pt x 109.1578pt>
|
||||
File: ../img/lez7-img5.JPG Graphic file (type jpg)
|
||||
<use ../img/lez7-img5.JPG>
|
||||
Package pdftex.def Info: ../img/lez7-img5.JPG used on input line 172.
|
||||
(pdftex.def) Requested size: 117.00119pt x 54.55045pt.
|
||||
|
||||
LaTeX Warning: Command \textquoteright invalid in math mode on input line 176.
|
||||
|
||||
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 161--180
|
||||
|
||||
[]
|
||||
|
||||
[4 <../img/lez7-img4.JPG> <../img/lez7-img5.JPG>] (lecture7.aux) )
|
||||
Here is how much of TeX's memory you used:
|
||||
5130 strings out of 480934
|
||||
69441 string characters out of 2909670
|
||||
334037 words of memory out of 3000000
|
||||
20876 multiletter control sequences out of 15000+200000
|
||||
546629 words of font info for 57 fonts, out of 3000000 for 9000
|
||||
1141 hyphenation exceptions out of 8191
|
||||
42i,7n,50p,332b,236s stack positions out of 5000i,500n,10000p,200000b,50000s
|
||||
<C:\Users\An
|
||||
dreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx1200.pk
|
||||
> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi6
|
||||
00\ecti1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jk
|
||||
nappen/ec/dpi600\ecbx1440.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\font
|
||||
s/pk/ljfour/jknappen/ec/dpi600\ecrm1200.pk> <C:\Users\AndreDany\AppData\Local\M
|
||||
iKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx2488.pk><C:/Program Files/MiKT
|
||||
eX 2.9/fonts/type1/public/amsfonts/cm/cmex10.pfb><C:/Program Files/MiKTeX 2.9/f
|
||||
onts/type1/public/amsfonts/cm/cmmi12.pfb><C:/Program Files/MiKTeX 2.9/fonts/typ
|
||||
e1/public/amsfonts/cm/cmmi8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public
|
||||
/amsfonts/cm/cmr12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts
|
||||
/cm/cmr6.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8.p
|
||||
fb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><C:/P
|
||||
rogram Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy6.pfb><C:/Program Fi
|
||||
les/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy8.pfb><C:/Program Files/MiKTe
|
||||
X 2.9/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
|
||||
Output written on lecture7.pdf (4 pages, 213421 bytes).
|
||||
PDF statistics:
|
||||
219 PDF objects out of 1000 (max. 8388607)
|
||||
0 named destinations out of 1000 (max. 500000)
|
||||
26 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
@ -32,5 +32,149 @@ i can look at this as a random variable
|
||||
\\
|
||||
$$
|
||||
\barra{E} \left[ \, \ell (y'_t, h(x'_t)) \right] = \ell_D(h) \longrightarrow \red{risk}
|
||||
$$\\
|
||||
Using law of large number (LLN), i know that:
|
||||
$$
|
||||
\hat{\ell} \longrightarrow \ell_D(h) \qquad as \quad n \rightarrow \infty
|
||||
$$
|
||||
We cannot have a sample of $n = \infty$ so we will introduce another assumption:
|
||||
the \red{Chernoff-Hoffding bound}
|
||||
|
||||
\subsection{Chernoff-Hoffding bound}
|
||||
$$
|
||||
Z_1,...,Z_n \quad \textit{iid random variable} \qquad \barra{E}\left[Z_t \right] = u
|
||||
$$
|
||||
all drawn for the same distribution
|
||||
\\
|
||||
$$
|
||||
t = 1, ..., n \qquad and \qquad 0 \leq Z_t \leq 1 \qquad t = 1,...,n \quad then \quad \forall \varepsilon > 0
|
||||
$$\
|
||||
$$
|
||||
\barra{P} \left( \frac{1}{n} \cdot \sum_{t=1}^{n} z_t > u + \varepsilon \right) \leq e^{-2 \, \varepsilon^2 \, n} \qquad or \qquad \barra{P} \left( \frac{1}{n} \cdot \sum_{t=1}^{n} z_t < u + \varepsilon \right) \leq e^{-2 \, \varepsilon^2 \, n}
|
||||
$$
|
||||
as sample size then $\downarrow$
|
||||
$$
|
||||
Z_t = \ell(Y'_t, h(X'_t)) \in \left[0,1\right]
|
||||
$$
|
||||
$
|
||||
(X'_1, Y'_1)...(X'_n, Y'_N)$ are $iid$ therefore, \\ $\ell\left(Y'_t, h\left(X'_t\right)\right)$ \quad $t = 1,...,n $ \quad are also $iid$
|
||||
\\
|
||||
We are using the bound of e to bound the deviation of this.
|
||||
|
||||
\subsubsection{Union Bound}
|
||||
Union bound: a collection of event not necessary disjoint, then i know
|
||||
that probability of the union of this event is the at most the sum of the
|
||||
probabilities of individual events
|
||||
$$
|
||||
A_1, ..., A_n \qquad \barra{P}\left( A_1 \cup ... \cup A_n \right) \leq \sum_{t=1}^{n} \barra{P} \left(A_t\right)
|
||||
$$
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\includegraphics[width=0.3\linewidth]{../img/lez7-img1.JPG}
|
||||
\caption{Example}
|
||||
%\label{fig:}
|
||||
\end{figure}\\
|
||||
\red{that's why $ \leq$}
|
||||
\\\\
|
||||
$$
|
||||
\barra{P} \left(|\,\hat{\ell}_{s'} \left( h \right) - \ell_D\left( h \right) \, | \, > \varepsilon \right)
|
||||
$$
|
||||
This is the probability according to the random draw of the test set.\\
|
||||
\\
|
||||
If test error differ from the risk by a number epsilon > 0. I want to bound the
|
||||
probability. This two thing will differ by more than epsilon. How can i use the
|
||||
Chernoff bound?
|
||||
$$
|
||||
|\,\hat{\ell}_{s'} \left( h \right) - \ell_D\left( h \right) \, | \, > \varepsilon \quad \Rightarrow \quad
|
||||
\hat{\ell}_{s'}\left(h\right)-\ell_D\left(h\right) > \varepsilon \quad \vee \quad
|
||||
\hat{\ell}_D \left(h\right)-\ell_{s'}\left(h\right) > \varepsilon
|
||||
$$
|
||||
|
||||
$$
|
||||
A, B \qquad A \Rightarrow B \qquad \barra{P} \left( A \right) < \barra{P} \left( B \right)
|
||||
$$
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\includegraphics[width=0.2\linewidth]{../img/lez7-img2.JPG}
|
||||
\caption{Example}
|
||||
%\label{fig:}
|
||||
\end{figure}
|
||||
$$
|
||||
\barra{P} \left(|\,\hat{\ell}_{s'} \left( h \right) - \ell_D\left( h \right) \, | \, > \varepsilon \right)
|
||||
\leq
|
||||
\barra{P} \left( \,| \hat{\ell}_{s'}\left(h\right)-\ell_D\left(h\right) |\,\right) \quad
|
||||
\cup \quad
|
||||
\barra{P} \left( \,|
|
||||
\hat{\ell}_D \left(h\right)-\ell_{s'}\left(h\right)
|
||||
|\,\right)
|
||||
\leq
|
||||
$$\
|
||||
$$
|
||||
\leq
|
||||
\barra{P} \left( \hat{\ell}_{s'} > \ell_D\left(h\right) + \varepsilon \right) + \barra{P} \left( \hat{\ell}_{s'} < \ell_D\left(h\right) - \varepsilon \right)
|
||||
\quad
|
||||
\leq \quad
|
||||
2 \cdot e^{-2 \, \varepsilon^2 \, n} \quad \Rightarrow \red{ \textit{we call it } \delta }
|
||||
$$
|
||||
$$
|
||||
\varepsilon = \sqrt[]{\frac{1}{2\cdot n}\ln \frac{2}{\delta
|
||||
}}
|
||||
$$
|
||||
\col{The two events are disjoint}{Blue}\\\\
|
||||
This mean that probability of this deviation is at least delta!
|
||||
$$
|
||||
|\, \hat{\ell}_{s'}\left(h\right)-\ell_D\left(h\right) \, | \leq \sqrt[]{\frac{1}{2\cdot n} \ln \frac{2}{\delta}} \qquad \textit{with probability at least $1- \delta$}
|
||||
$$
|
||||
\red{Test error of true estimate is going to be good for this value ($\delta$)}
|
||||
\\
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\includegraphics[width=0.5\linewidth]{../img/lez7-img3.JPG}
|
||||
\caption{Example}
|
||||
%\label{fig:}
|
||||
\end{figure}Confidence interval for risk at confidence level 1-delta.\\
|
||||
I want to take $\delta = 0,05$ so that $1 - \delta$ is $95\%$. So test error is going to be
|
||||
an estimate of the true risk which is precise that depend on how big is the test
|
||||
set ($n$).\\
|
||||
As n grows I can pin down the position of the true risk.\\\
|
||||
This is how we can use probability to make sense of what we do in practise.
|
||||
If we take a predictor h we can compute the risk error estimate.\\
|
||||
We can measure how accurate is our risk error estimate.\\
|
||||
\textbf{Test error is an estimate of risk for a given predictor (h).}
|
||||
\\
|
||||
$$
|
||||
\barra{E} \left[ \, \ell\left( Y'_t, h\left(X'_t\right)\right) \, \right] = \ell_D \left( h\right)
|
||||
$$
|
||||
\textbf{h is fixed with respect to S’} $\longrightarrow$ $h$ does not depend on the test set.
|
||||
So learning algorithm which produce h not have access to test set.\\
|
||||
If we use test set we break down this equation.
|
||||
\\\\
|
||||
Now, how to \textbf{build a good algorithm?}\\
|
||||
Training set $S = \{ \left(x_1,y_1\right)...\left(x_m,y_m\right) \}$ random sample
|
||||
\\$ A $ \qquad $A\left(S\right) = h $ predictor output by $A$ given $S$
|
||||
where A is \red{learning algorithm as function of traning set $S$.}
|
||||
\\
|
||||
$\forall \, S$ \qquad $A\left(S\right) \in H \qquad h^* \in H $
|
||||
\\
|
||||
$$
|
||||
\ell_D\left(h^*\right) = min \, \ell_D \left(h\right) \qquad \hat{\ell}_s\left(h^*\right) \textit{is closed to } \ell_D\left(h^*\right) \longrightarrow \textbf{it is going to have small error }
|
||||
$$
|
||||
where $\ell_D\left(h^*\right)$ is the \red{training error of $h^*$}
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\includegraphics[width=0.3\linewidth]{../img/lez7-img4.JPG}
|
||||
\caption{Example}
|
||||
%\label{fig:}
|
||||
\end{figure}\\
|
||||
This guy $\ell_D\left(h^*\right)$ is closest to $0$ since optimum\\
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\includegraphics[width=0.3\linewidth]{../img/lez7-img5.JPG}
|
||||
\caption{Example}
|
||||
%\label{fig:}
|
||||
\end{figure}\\
|
||||
In risk we get opt in $h^*$ but in empirical one we could get another $h’$ better than $h^+$
|
||||
\\\\
|
||||
In order to fix on a concrete algorithm we are going to take the empirical Islam
|
||||
minimiser (ERM) algorithm.
|
||||
\end{document}
|
Loading…
Reference in New Issue
Block a user