continue lecture 7

2025-01-05 17:15:56 +01:00 · 2020-04-14 21:52:54 +02:00 · 2020-04-14 21:52:54 +02:00 · 9188535fe9
commit 9188535fe9
parent 616738422d
10 changed files with 283 additions and 25 deletions
--- a/Learning/img/lez7-img1.JPG
+++ b/Learning/img/lez7-img1.JPG
--- a/Learning/img/lez7-img2.JPG
+++ b/Learning/img/lez7-img2.JPG
--- a/Learning/img/lez7-img3.JPG
+++ b/Learning/img/lez7-img3.JPG
--- a/Learning/img/lez7-img4.JPG
+++ b/Learning/img/lez7-img4.JPG
--- a/Learning/img/lez7-img5.JPG
+++ b/Learning/img/lez7-img5.JPG
--- a/Learning/lectures/lecture7.aux
+++ b/Learning/lectures/lecture7.aux
@ -4,3 +4,10 @@
 \@writefile{toc}{\contentsline {chapter}{\numberline {1}Lecture 7 - 07-04-2020}{1}\protected@file@percent }
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.0.1}Chernoff-Hoffding bound}{1}\protected@file@percent }
+\@writefile{toc}{\contentsline {subsubsection}{Union Bound}{2}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Example}}{2}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {1.2}{\ignorespaces Example}}{3}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {1.3}{\ignorespaces Example}}{3}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {1.4}{\ignorespaces Example}}{4}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {1.5}{\ignorespaces Example}}{4}\protected@file@percent }
--- a/Learning/lectures/lecture7.log
+++ b/Learning/lectures/lecture7.log
@ -1,4 +1,4 @@
-This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13)  13 APR 2020 20:57
+This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13)  14 APR 2020 21:52
 entering extended mode
 **./lecture7.tex
 (lecture7.tex
@ -336,32 +336,139 @@ Underfull \hbox (badness 10000) in paragraph at lines 29--33

 []

+
+Underfull \hbox (badness 10000) in paragraph at lines 46--49
+
+ []
+
+
+Overfull \hbox (0.51405pt too wide) detected at line 54
+\U/msb/m/n/12 P [] \OMS/cmsy/m/n/12  \OML/cmm/m/it/12 e[] or \U/msb/m/n/12 P [
+] \OMS/cmsy/m/n/12  \OML/cmm/m/it/12 e[]
+ []
+
 [1


 {C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
-(lecture7.aux) ) 
-Here is how much of TeX's memory you used:
- 5070 strings out of 480934
- 68436 string characters out of 2909670
- 331037 words of memory out of 3000000
- 20834 multiletter control sequences out of 15000+200000
- 539297 words of font info for 45 fonts, out of 3000000 for 9000
- 1141 hyphenation exceptions out of 8191
- 42i,5n,50p,332b,124s stack positions out of 5000i,500n,10000p,200000b,50000s
- <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/j
-knappen/ec/dpi600\ecrm1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fon
-ts/pk/ljfour/jknappen/ec/dpi600\ecbx2488.pk><C:/Program Files/MiKTeX 2.9/fonts/
-type1/public/amsfonts/cm/cmex10.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/pu
-blic/amsfonts/cm/cmmi12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/ams
-fonts/cm/cmmi8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/
-cmr12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8.pfb>
-<C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><C:/Prog
-ram Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy8.pfb><C:/Program Files
-/MiKTeX 2.9/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
-Output written on lecture7.pdf (1 page, 85469 bytes).
-PDF statistics:
- 98 PDF objects out of 1000 (max. 8388607)
- 0 named destinations out of 1000 (max. 500000)
- 1 words of extra memory for PDF output out of 10000 (max. 10000000)
+<../img/lez7-img1.JPG, id=17, 188.95593pt x 88.83188pt>
+File: ../img/lez7-img1.JPG Graphic file (type jpg)
+<use ../img/lez7-img1.JPG>
+Package pdftex.def Info: ../img/lez7-img1.JPG  used on input line 73.
+(pdftex.def)             Requested size: 117.00119pt x 55.0047pt.
+
+Underfull \hbox (badness 10000) in paragraph at lines 70--79
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 70--79
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 81--87
+
+ []
+
+<../img/lez7-img2.JPG, id=18, 79.79813pt x 73.02281pt>
+File: ../img/lez7-img2.JPG Graphic file (type jpg)
+<use ../img/lez7-img2.JPG>
+Package pdftex.def Info: ../img/lez7-img2.JPG  used on input line 98.
+(pdftex.def)             Requested size: 77.99881pt x 71.37688pt.
+
+LaTeX Warning: `h' float specifier changed to `ht'.
+
+
+Overfull \hbox (27.96684pt too wide) detected at line 111
+\U/msb/m/n/12 P [] \OMS/cmsy/m/n/12  \U/msb/m/n/12 P []  \OMS/cmsy/m/n/12 [  \
+U/msb/m/n/12 P [] \OMS/cmsy/m/n/12 
+ []
+
+
+Overfull \hbox (4.4292pt too wide) detected at line 118
+\OMS/cmsy/m/n/12  \U/msb/m/n/12 P [] \OT1/cmr/m/n/12 + \U/msb/m/n/12 P []  \OM
+S/cmsy/m/n/12   \OT1/cmr/m/n/12 2 \OMS/cmsy/m/n/12  \OML/cmm/m/it/12 e[]  \OM
+S/cmsy/m/n/12 ) [][]
+ []
+
+[2 <../img/lez7-img1.JPG>]
+Underfull \hbox (badness 10000) in paragraph at lines 122--125
+
+ []
+
+<../img/lez7-img3.JPG, id=25, 388.45125pt x 124.96687pt>
+File: ../img/lez7-img3.JPG Graphic file (type jpg)
+<use ../img/lez7-img3.JPG>
+Package pdftex.def Info: ../img/lez7-img3.JPG  used on input line 132.
+(pdftex.def)             Requested size: 195.0pt x 62.73308pt.
+
+Underfull \hbox (badness 10000) in paragraph at lines 127--145
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 147--159
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 147--159
+
+ []
+
+[3 <../img/lez7-img2.JPG> <../img/lez7-img3.JPG>]
+Overfull \hbox (76.34352pt too wide) detected at line 161
+\OML/cmm/m/it/12 `[] [] \OT1/cmr/m/n/12 = \OML/cmm/m/it/12 min `[] []  [] [] []
+`[] [] [][]\OMS/cmsy/m/n/12 ! []
+ []
+
+<../img/lez7-img4.JPG, id=29, 245.41687pt x 77.53969pt>
+File: ../img/lez7-img4.JPG Graphic file (type jpg)
+<use ../img/lez7-img4.JPG>
+Package pdftex.def Info: ../img/lez7-img4.JPG  used on input line 165.
+(pdftex.def)             Requested size: 117.00119pt x 36.9666pt.
+<../img/lez7-img5.JPG, id=30, 234.1247pt x 109.1578pt>
+File: ../img/lez7-img5.JPG Graphic file (type jpg)
+<use ../img/lez7-img5.JPG>
+Package pdftex.def Info: ../img/lez7-img5.JPG  used on input line 172.
+(pdftex.def)             Requested size: 117.00119pt x 54.55045pt.
+
+LaTeX Warning: Command \textquoteright invalid in math mode on input line 176.
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 161--180
+
+ []
+
+[4 <../img/lez7-img4.JPG> <../img/lez7-img5.JPG>] (lecture7.aux) ) 
+Here is how much of TeX's memory you used:
+ 5130 strings out of 480934
+ 69441 string characters out of 2909670
+ 334037 words of memory out of 3000000
+ 20876 multiletter control sequences out of 15000+200000
+ 546629 words of font info for 57 fonts, out of 3000000 for 9000
+ 1141 hyphenation exceptions out of 8191
+ 42i,7n,50p,332b,236s stack positions out of 5000i,500n,10000p,200000b,50000s
+ <C:\Users\An
+dreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx1200.pk
+> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi6
+00\ecti1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jk
+nappen/ec/dpi600\ecbx1440.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\font
+s/pk/ljfour/jknappen/ec/dpi600\ecrm1200.pk> <C:\Users\AndreDany\AppData\Local\M
+iKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx2488.pk><C:/Program Files/MiKT
+eX 2.9/fonts/type1/public/amsfonts/cm/cmex10.pfb><C:/Program Files/MiKTeX 2.9/f
+onts/type1/public/amsfonts/cm/cmmi12.pfb><C:/Program Files/MiKTeX 2.9/fonts/typ
+e1/public/amsfonts/cm/cmmi8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public
+/amsfonts/cm/cmr12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts
+/cm/cmr6.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8.p
+fb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><C:/P
+rogram Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy6.pfb><C:/Program Fi
+les/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy8.pfb><C:/Program Files/MiKTe
+X 2.9/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
+Output written on lecture7.pdf (4 pages, 213421 bytes).
+PDF statistics:
+ 219 PDF objects out of 1000 (max. 8388607)
+ 0 named destinations out of 1000 (max. 500000)
+ 26 words of extra memory for PDF output out of 10000 (max. 10000000)

--- a/Learning/lectures/lecture7.pdf
+++ b/Learning/lectures/lecture7.pdf
--- a/Learning/lectures/lecture7.synctex.gz
+++ b/Learning/lectures/lecture7.synctex.gz
--- a/Learning/lectures/lecture7.tex
+++ b/Learning/lectures/lecture7.tex
@ -32,5 +32,149 @@ i can look at this as a random variable
 \\
 $$
 \barra{E} \left[ \, \ell (y'_t, h(x'_t)) \right] = \ell_D(h) \longrightarrow \red{risk}
+$$\\
+Using law of large number (LLN), i know that:
 $$
+\hat{\ell} \longrightarrow \ell_D(h) \qquad as \quad n \rightarrow \infty
+$$
+We cannot have a sample of $n = \infty$ so we will introduce another assumption:
+the \red{Chernoff-Hoffding bound}
+
+\subsection{Chernoff-Hoffding bound}
+$$
+Z_1,...,Z_n \quad \textit{iid random variable} \qquad \barra{E}\left[Z_t \right] = u
+$$
+all drawn for the same distribution 
+\\
+$$
+t = 1, ..., n \qquad and \qquad 0 \leq Z_t \leq 1 \qquad t = 1,...,n \quad then \quad \forall \varepsilon > 0
+$$\
+$$
+\barra{P} \left( \frac{1}{n} \cdot \sum_{t=1}^{n} z_t > u + \varepsilon \right) \leq e^{-2 \, \varepsilon^2 \, n} \qquad  or \qquad \barra{P} \left( \frac{1}{n} \cdot \sum_{t=1}^{n} z_t < u + \varepsilon \right) \leq e^{-2 \, \varepsilon^2 \, n}
+$$
+as sample size then $\downarrow$
+$$
+Z_t = \ell(Y'_t, h(X'_t)) \in \left[0,1\right]
+$$
+$
+(X'_1, Y'_1)...(X'_n, Y'_N)$ are $iid$ therefore, \\ $\ell\left(Y'_t, h\left(X'_t\right)\right)$ \quad $t = 1,...,n $ \quad are also $iid$
+\\
+We are using the bound of e to bound the deviation of this.
+
+\subsubsection{Union Bound}
+Union bound: a collection of event not necessary disjoint, then i know
+that probability of the union of this event is the at most the sum of the
+probabilities of individual events
+$$
+A_1, ..., A_n \qquad \barra{P}\left( A_1 \cup ... \cup A_n \right) \leq \sum_{t=1}^{n} \barra{P} \left(A_t\right)
+$$
+\begin{figure}[h]
+    \centering
+    \includegraphics[width=0.3\linewidth]{../img/lez7-img1.JPG}
+    \caption{Example}
+    %\label{fig:}
+\end{figure}\\
+\red{that's why $ \leq$}
+\\\\
+$$
+\barra{P} \left(|\,\hat{\ell}_{s'} \left( h \right) - \ell_D\left( h \right) \, | \, > \varepsilon \right)
+$$
+This is the probability according to the random draw of the test set.\\
+\\
+If test error differ from the risk by a number epsilon > 0. I want to bound the
+probability. This two thing will differ by more than epsilon. How can i use the
+Chernoff bound?
+$$
+|\,\hat{\ell}_{s'} \left( h \right) - \ell_D\left( h \right) \, | \, > \varepsilon  \quad \Rightarrow \quad 
+\hat{\ell}_{s'}\left(h\right)-\ell_D\left(h\right) > \varepsilon \quad \vee \quad
+\hat{\ell}_D \left(h\right)-\ell_{s'}\left(h\right) > \varepsilon
+$$
+
+$$
+A, B \qquad A \Rightarrow B \qquad \barra{P} \left( A \right) < \barra{P} \left( B \right)
+$$
+\begin{figure}[h]
+    \centering
+    \includegraphics[width=0.2\linewidth]{../img/lez7-img2.JPG}
+    \caption{Example}
+    %\label{fig:}
+\end{figure}
+$$
+\barra{P} \left(|\,\hat{\ell}_{s'} \left( h \right) - \ell_D\left( h \right) \, | \, > \varepsilon \right) 
+\leq
+\barra{P} \left( \,| \hat{\ell}_{s'}\left(h\right)-\ell_D\left(h\right) |\,\right) \quad
+\cup \quad
+\barra{P} \left( \,|
+\hat{\ell}_D \left(h\right)-\ell_{s'}\left(h\right) 
+|\,\right) 
+\leq
+$$\
+$$
+\leq
+\barra{P} \left( \hat{\ell}_{s'} > \ell_D\left(h\right) + \varepsilon \right) + \barra{P} \left( \hat{\ell}_{s'} < \ell_D\left(h\right) - \varepsilon \right) 
+\quad
+\leq \quad
+2 \cdot e^{-2 \, \varepsilon^2 \, n} \quad \Rightarrow \red{ \textit{we call it } \delta }
+$$
+$$
+\varepsilon = \sqrt[]{\frac{1}{2\cdot n}\ln \frac{2}{\delta
+}}
+$$
+\col{The two events are disjoint}{Blue}\\\\
+This mean that probability of this deviation is at least delta!
+$$
+|\, \hat{\ell}_{s'}\left(h\right)-\ell_D\left(h\right) \, | \leq \sqrt[]{\frac{1}{2\cdot n} \ln \frac{2}{\delta}} \qquad \textit{with probability at least $1- \delta$}
+$$
+\red{Test error of true estimate is going to be good for this value ($\delta$)}
+\\
+\begin{figure}[h]
+    \centering
+    \includegraphics[width=0.5\linewidth]{../img/lez7-img3.JPG}
+    \caption{Example}
+    %\label{fig:}
+\end{figure}Confidence interval for risk at confidence level 1-delta.\\
+I want to take $\delta = 0,05$ so that $1 - \delta$ is $95\%$. So test error is going to be
+an estimate of the true risk which is precise that depend on how big is the test
+set ($n$).\\
+As n grows I can pin down the position of the true risk.\\\
+This is how we can use probability to make sense of what we do in practise.
+If we take a predictor h we can compute the risk error estimate.\\
+We can measure how accurate is our risk error estimate.\\
+\textbf{Test error is an estimate of risk for a given predictor (h).}
+\\ 
+$$
+\barra{E} \left[ \, \ell\left( Y'_t, h\left(X'_t\right)\right) \, \right] = \ell_D \left( h\right) 
+$$
+\textbf{h is fixed with respect to S’} $\longrightarrow$ $h$ does not depend on the test set.
+So learning algorithm which produce h not have access to test set.\\
+If we use test set we break down this equation.
+\\\\
+Now, how to \textbf{build a good algorithm?}\\
+Training set $S = \{ \left(x_1,y_1\right)...\left(x_m,y_m\right) \}$ random sample
+\\$ A $ \qquad $A\left(S\right) = h $ predictor output by $A$ given $S$
+where A is \red{learning algorithm as function of traning set $S$.}
+\\
+$\forall \, S$ \qquad $A\left(S\right) \in H \qquad h^* \in H $
+\\
+$$
+\ell_D\left(h^*\right) = min \, \ell_D \left(h\right) \qquad \hat{\ell}_s\left(h^*\right) \textit{is closed to } \ell_D\left(h^*\right) \longrightarrow \textbf{it is going to have small error }
+$$
+where $\ell_D\left(h^*\right)$ is the \red{training error of $h^*$}
+\begin{figure}[h]
+    \centering
+    \includegraphics[width=0.3\linewidth]{../img/lez7-img4.JPG}
+    \caption{Example}
+    %\label{fig:}
+\end{figure}\\
+This guy $\ell_D\left(h^*\right)$ is closest to $0$ since optimum\\
+\begin{figure}[h]
+    \centering
+    \includegraphics[width=0.3\linewidth]{../img/lez7-img5.JPG}
+    \caption{Example}
+    %\label{fig:}
+\end{figure}\\
+In risk we get opt in $h^*$ but in empirical one we could get another $h’$ better than $h^+$
+\\\\
+In order to fix on a concrete algorithm we are going to take the empirical Islam
+minimiser (ERM) algorithm.
 \end{document}