mirror of
https://github.com/Andreaierardi/Master-DataScience-Notes.git
synced 2025-02-01 14:17:42 +01:00
first lessons
This commit is contained in:
parent
34fdb7e72b
commit
9f5a6ac477
@ -4,4 +4,6 @@
|
|||||||
\@writefile{toc}{\contentsline {chapter}{\numberline {1}Lecture 1 - 09-03-2020}{1}\protected@file@percent }
|
\@writefile{toc}{\contentsline {chapter}{\numberline {1}Lecture 1 - 09-03-2020}{1}\protected@file@percent }
|
||||||
\@writefile{lof}{\addvspace {10\p@ }}
|
\@writefile{lof}{\addvspace {10\p@ }}
|
||||||
\@writefile{lot}{\addvspace {10\p@ }}
|
\@writefile{lot}{\addvspace {10\p@ }}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {1.1}Introduction}{1}\protected@file@percent }
|
\@writefile{toc}{\contentsline {section}{\numberline {1.1}Introduction of the course}{1}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {section}{\numberline {1.2}Examples}{1}\protected@file@percent }
|
||||||
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.1}Spam filtering}{4}\protected@file@percent }
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 13 APR 2020 12:41
|
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 15 APR 2020 21:00
|
||||||
entering extended mode
|
entering extended mode
|
||||||
**./lecture1.tex
|
**./lecture1.tex
|
||||||
(lecture1.tex
|
(lecture1.tex
|
||||||
@ -232,16 +232,6 @@ Package: titlesec 2019/10/16 v2.13 Sectioning titles
|
|||||||
)
|
)
|
||||||
("C:\Program Files\MiKTeX 2.9\tex/latex/base\fontenc.sty"
|
("C:\Program Files\MiKTeX 2.9\tex/latex/base\fontenc.sty"
|
||||||
Package: fontenc 2020/02/11 v2.0o Standard LaTeX package
|
Package: fontenc 2020/02/11 v2.0o Standard LaTeX package
|
||||||
)
|
|
||||||
("C:\Program Files\MiKTeX 2.9\tex/latex/fncychap\fncychap.sty"
|
|
||||||
Package: fncychap 2007/07/30 v1.34 LaTeX package (Revised chapters)
|
|
||||||
\RW=\skip55
|
|
||||||
\mylen=\skip56
|
|
||||||
\myhi=\skip57
|
|
||||||
\px=\skip58
|
|
||||||
\py=\skip59
|
|
||||||
\pyy=\skip60
|
|
||||||
\pxx=\skip61
|
|
||||||
)))
|
)))
|
||||||
("C:\Program Files\MiKTeX 2.9\tex/latex/l3backend\l3backend-pdfmode.def"
|
("C:\Program Files\MiKTeX 2.9\tex/latex/l3backend\l3backend-pdfmode.def"
|
||||||
File: l3backend-pdfmode.def 2020-03-12 L3 backend support: PDF mode
|
File: l3backend-pdfmode.def 2020-03-12 L3 backend support: PDF mode
|
||||||
@ -318,29 +308,68 @@ G,.JBIG2,.JB2,.eps]
|
|||||||
(grfext) \AppendGraphicsExtensions on input line 504.
|
(grfext) \AppendGraphicsExtensions on input line 504.
|
||||||
)
|
)
|
||||||
Chapter 1.
|
Chapter 1.
|
||||||
LaTeX Font Info: Trying to load font information for OT1+pzc on input line 3
|
|
||||||
.
|
|
||||||
("C:\Program Files\MiKTeX 2.9\tex/latex/psnfss\ot1pzc.fd"
|
|
||||||
File: ot1pzc.fd 2020/03/25 font definitions for OT1/pzc.
|
|
||||||
)
|
|
||||||
LaTeX Font Info: Font shape `OT1/pzc/m/n' in size <76> not available
|
|
||||||
(Font) Font shape `OT1/pzc/m/it' tried instead on input line 3.
|
|
||||||
|
|
||||||
Overfull \hbox (10.0pt too wide) in paragraph at lines 3--3
|
Underfull \hbox (badness 10000) in paragraph at lines 8--14
|
||||||
[][][][][][][][][]
|
|
||||||
[]
|
|
||||||
|
|
||||||
LaTeX Font Info: Trying to load font information for T1+cmss on input line 3
|
|
||||||
.
|
|
||||||
("C:\Program Files\MiKTeX 2.9\tex/latex/base\t1cmss.fd"
|
|
||||||
File: t1cmss.fd 2019/12/16 v2.5j Standard LaTeX font definitions
|
|
||||||
)
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 8--95
|
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 8--95
|
Underfull \hbox (badness 10000) in paragraph at lines 8--14
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
LaTeX Font Info: Trying to load font information for U+msa on input line 15.
|
||||||
|
|
||||||
|
("C:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsa.fd"
|
||||||
|
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
|
||||||
|
)
|
||||||
|
LaTeX Font Info: Trying to load font information for U+msb on input line 15.
|
||||||
|
|
||||||
|
|
||||||
|
("C:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsb.fd"
|
||||||
|
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
|
||||||
|
)
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 22--23
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--53
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--53
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--53
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--53
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--53
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--53
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--53
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 32--53
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
@ -348,55 +377,85 @@ Underfull \hbox (badness 10000) in paragraph at lines 8--95
|
|||||||
|
|
||||||
|
|
||||||
{C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
|
{C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
|
||||||
LaTeX Font Info: Trying to load font information for U+msa on input line 96.
|
Underfull \hbox (badness 10000) in paragraph at lines 61--89
|
||||||
|
|
||||||
|
|
||||||
("C:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsa.fd"
|
|
||||||
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
|
|
||||||
)
|
|
||||||
LaTeX Font Info: Trying to load font information for U+msb on input line 96.
|
|
||||||
|
|
||||||
|
|
||||||
("C:\Program Files\MiKTeX 2.9\tex/latex/amsfonts\umsb.fd"
|
|
||||||
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
|
|
||||||
)
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 98--138
|
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 98--138
|
Underfull \hbox (badness 10000) in paragraph at lines 61--89
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 98--138
|
Underfull \hbox (badness 10000) in paragraph at lines 61--89
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[2] [3] (lecture1.aux) )
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 61--89
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
[2]
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 93--104
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 93--104
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 93--104
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 107--123
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 107--123
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 107--123
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
[3]
|
||||||
|
Underfull \hbox (badness 10000) in paragraph at lines 132--135
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
[4] (lecture1.aux) )
|
||||||
Here is how much of TeX's memory you used:
|
Here is how much of TeX's memory you used:
|
||||||
5192 strings out of 480934
|
5099 strings out of 480934
|
||||||
70233 string characters out of 2909670
|
68776 string characters out of 2909670
|
||||||
340605 words of memory out of 3000000
|
333073 words of memory out of 3000000
|
||||||
20940 multiletter control sequences out of 15000+200000
|
20853 multiletter control sequences out of 15000+200000
|
||||||
542533 words of font info for 50 fonts, out of 3000000 for 9000
|
546309 words of font info for 55 fonts, out of 3000000 for 9000
|
||||||
1141 hyphenation exceptions out of 8191
|
1141 hyphenation exceptions out of 8191
|
||||||
42i,9n,50p,332b,220s stack positions out of 5000i,500n,10000p,200000b,50000s
|
42i,9n,50p,332b,152s stack positions out of 5000i,500n,10000p,200000b,50000s
|
||||||
<C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/
|
<C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfo
|
||||||
ljfour/jknappen/ec/dpi600\tcrm1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX
|
ur/jknappen/ec/dpi600\ecti1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9
|
||||||
\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecrm1200.pk> <C:\Users\AndreDany\AppDat
|
\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx1440.pk> <C:\Users\AndreDany\AppData\Lo
|
||||||
a\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx1728.pk> <C:\Users\An
|
cal\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\tcrm1200.pk> <C:\Users\AndreD
|
||||||
dreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecsx1728.pk
|
any\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\ecbx1200.pk> <C
|
||||||
>{C:/Program Files/MiKTeX 2.9/fonts/enc/dvips/base/8r.enc}<C:/Program Files/MiK
|
:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknappen/ec/dpi600\e
|
||||||
TeX 2.9/fonts/type1/public/amsfonts/cm/cmex10.pfb><C:/Program Files/MiKTeX 2.9/
|
crm1200.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfour/jknapp
|
||||||
fonts/type1/public/amsfonts/cm/cmmi12.pfb><C:/Program Files/MiKTeX 2.9/fonts/ty
|
en/ec/dpi600\ecbx1728.pk> <C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk
|
||||||
pe1/public/amsfonts/cm/cmr12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/publi
|
/ljfour/jknappen/ec/dpi600\ecbx2488.pk><C:/Program Files/MiKTeX 2.9/fonts/type1
|
||||||
c/amsfonts/cm/cmsy10.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/urw/zapfchan/
|
/public/amsfonts/cm/cmex10.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/
|
||||||
uzcmi8a.pfb>
|
amsfonts/cm/cmmi12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts
|
||||||
Output written on lecture1.pdf (3 pages, 76413 bytes).
|
/cm/cmr12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy1
|
||||||
|
0.pfb>
|
||||||
|
Output written on lecture1.pdf (4 pages, 100195 bytes).
|
||||||
PDF statistics:
|
PDF statistics:
|
||||||
143 PDF objects out of 1000 (max. 8388607)
|
205 PDF objects out of 1000 (max. 8388607)
|
||||||
0 named destinations out of 1000 (max. 500000)
|
0 named destinations out of 1000 (max. 500000)
|
||||||
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||||
|
|
||||||
|
Binary file not shown.
Binary file not shown.
@ -2,90 +2,99 @@
|
|||||||
\begin{document}
|
\begin{document}
|
||||||
\chapter{Lecture 1 - 09-03-2020}
|
\chapter{Lecture 1 - 09-03-2020}
|
||||||
|
|
||||||
\section{Introduction}
|
\section{Introduction of the course}
|
||||||
This is time for all good men to come to the aid of their party!
|
|
||||||
|
|
||||||
MACHINE LEARNING
|
|
||||||
In this course we look at the principle behind design of Machine learning.
|
In this course we look at the principle behind design of Machine learning.
|
||||||
Not just coding but have an idea of algorithm that can work with the data.
|
Not just coding but have an idea of algorithm that can work with the data.\\\\
|
||||||
We have to fix a mathematical framework: some statistic and mathematics.
|
We have to fix a mathematical framework: some statistic and mathematics.\\
|
||||||
Work on ML on a higher level
|
\textbf{Work on ML on a higher level}\\
|
||||||
ML is data inference: make prediction about the future using data about the
|
ML is data inference: make prediction about the future using data about the
|
||||||
past
|
past\\
|
||||||
Clustering —> grouping according to similarity
|
\begin{itemize}
|
||||||
Planning —> (robot to learn to interact in a certain environment)
|
\item Clustering $\rightarrow$ grouping according to similarity
|
||||||
Classification —> (assign meaning to data) example: Spam filtering
|
\item Planning $\rightarrow$ (robot to learn to interact in a certain environment)
|
||||||
|
\item Classification $\rightarrow$ (assign meaning to data) example: Spam filtering\\
|
||||||
I want to predict the outcome of this individual or i want to predict whether a
|
I want to predict the outcome of this individual or i want to predict whether a
|
||||||
person click or not in a certain advertisement.
|
person click or not in a certain advertisement.
|
||||||
Examples
|
\end{itemize}
|
||||||
Classify data into categories:
|
\section{Examples}
|
||||||
Medical diagnosis: data are medical records and • categories are diseases
|
Classify data into categories:\\
|
||||||
• Document analysis: data are texts and categories are topics
|
\begin{itemize}
|
||||||
• Image analysts: data are digital images and for categories name of objects
|
\item Medical diagnosis: data are medical records and categories are diseases
|
||||||
|
\item Document analysis: data are texts and categories are topics
|
||||||
|
\item Image analysts: data are digital images and for categories name of objects
|
||||||
in the image (but could be different).
|
in the image (but could be different).
|
||||||
• Spam filtering: data are emails, categories are spam vs non spam.
|
\item Spam filtering: data are emails, categories are spam vs non spam.
|
||||||
• Advertising prediction: data are features of web site visitors and categories
|
\item Advertising prediction: data are features of web site visitors and categories
|
||||||
could be click/non click on banners.
|
could be click/non click on banners.
|
||||||
Classification : Different from clustering since we do not have semantically
|
\end{itemize}
|
||||||
classification (spam or not spam) —> like meaning of the image.
|
Classification : \textbf{ Different from clustering }since we do not have semantically
|
||||||
|
classification (spam or not spam) $\rightarrow$ like meaning of the image.\\
|
||||||
I have a semantic label.
|
I have a semantic label.
|
||||||
Clustering: i want to group data with similarity function.
|
\\\\
|
||||||
Planning: Learning what to do next
|
Clustering: i want to group data with similarity function. \\\\
|
||||||
Clustering: Learn similarity function
|
Planning: Learning what to do next \\\\
|
||||||
Classification: Learn semantic labels meaning of data
|
Clustering: Learn similarity function \\\\
|
||||||
Planning: Learn actions given state
|
Classification: Learn semantic labels meaning of data\\\\
|
||||||
|
Planning: Learn actions given state\\\\
|
||||||
In classification is an easier than planning task since I’m able to make
|
In classification is an easier than planning task since I’m able to make
|
||||||
prediction telling what is the semantic label that goes with data points.
|
prediction telling what is the semantic label that goes with data points.\\
|
||||||
If i can do classification i can clustering.
|
If i can do classification i can clustering.\\
|
||||||
If you do planning you probably classify (since you understanding meaning in
|
If you do planning you probably classify (since you understanding meaning in
|
||||||
your position) and then you can also do clustering probably.
|
your position) and then you can also do clustering probably.\\
|
||||||
We will focus on classification because many tasks are about classification.
|
We will focus on classification because many tasks are about classification.\\\\
|
||||||
Classify data in categories we can image a set of categories.
|
Classify data in categories we can image a set of categories.\\
|
||||||
For instance the tasks:
|
For instance the tasks:\\
|
||||||
‘predict income of a person’
|
‘predict income of a person’\\
|
||||||
‘Predict tomorrow price for a stock’
|
‘Predict tomorrow price for a stock’\\
|
||||||
The label is a number and not an abstract thing.
|
The label is a number and not an abstract thing.\\\\
|
||||||
We can distinguish two cases:
|
We can distinguish two cases:
|
||||||
The label set —> set of possible categories for each data • point. For each of
|
\begin{itemize}
|
||||||
|
\item The label set $\rightarrow$ set of possible categories for each data point. For each of
|
||||||
this could be finite set of abstract symbols (case of document classification,
|
this could be finite set of abstract symbols (case of document classification,
|
||||||
medical diagnosis). So the task is classification.
|
medical diagnosis). So the task is classification.
|
||||||
• Real number (no bound on how many of them). My prediction will be a real
|
\item Real number (no bound on how many of them). My prediction will be a real
|
||||||
number and is not a category. In this case we talk about a task of
|
number and is not a category. In this case we talk about a task of
|
||||||
regression.
|
regression.
|
||||||
|
\end{itemize}
|
||||||
Classification: task we want to give a label predefined point in abstract
|
Classification: task we want to give a label predefined point in abstract
|
||||||
categories (like YES or NO)
|
categories (like YES or NO)
|
||||||
|
\\
|
||||||
Regression: task we want to give label to data points but this label are
|
Regression: task we want to give label to data points but this label are
|
||||||
numbers.
|
numbers.\\\\
|
||||||
When we say prediction task: used both for classification and regression
|
When we say prediction task: used both for classification and regression
|
||||||
tasks.
|
tasks.\\
|
||||||
Supervised learning: Label attached to data (classification, regression)
|
Supervised learning: Label attached to data (classification, regression)\\
|
||||||
Unsupervised learning: No labels attached to data (clustering)
|
Unsupervised learning: No labels attached to data (clustering)\\
|
||||||
In unsupervised the mathematical modelling and way algorithm are score and
|
In unsupervised the mathematical modelling and way algorithm are score and
|
||||||
can learn from mistakes is a little bit harder. Problem of clustering is harder to
|
can learn from mistakes is a little bit harder. Problem of clustering is harder to
|
||||||
model mathematically.
|
model mathematically.\\
|
||||||
You can cast planning as supervised learning: i can show the robot which is
|
You can cast planning as supervised learning: i can show the robot which is
|
||||||
the right action to do in that state. But that depends on planning task is
|
the right action to do in that state. But that depends on planning task is
|
||||||
formalised.
|
formalised.\\
|
||||||
Planning is higher level of learning since include task of supervised and
|
Planning is higher level of learning since include task of supervised and
|
||||||
unsupervised learning.
|
unsupervised learning.\\\\
|
||||||
Why is this important ?
|
Why is this important ?\\
|
||||||
Algorithm has to know how to given the label.
|
Algorithm has to know how to given the label.\\
|
||||||
In ML we want to teach the algorithm to perform prediction correctly. Initially
|
In ML we want to teach the algorithm to perform prediction correctly. Initially
|
||||||
algorithm will make mistakes in classifying data. We want to tell algorithm that
|
algorithm will make mistakes in classifying data. We want to tell algorithm that
|
||||||
classification was wrong and just want to perform a score. Like giving a grade
|
classification was wrong and just want to perform a score. Like giving a grade
|
||||||
to the algorithm to understand if it did bad or really bad.
|
to the algorithm to understand if it did bad or really bad.
|
||||||
So we have mistakes!
|
So we have mistakes!\\\\
|
||||||
Algorithm predicts and something makes a mistake —> we can correct it.
|
Algorithm predicts and something makes a mistake $\rightarrow$ we can correct it.\\
|
||||||
Then algorithm can be more precisely.
|
Then algorithm can be more precisely.
|
||||||
We have to define this mistake.
|
We have to define this mistake.\\
|
||||||
Mistakes in case of classification:
|
Mistakes in case of classification:\\
|
||||||
If category is the wrong one (in the simple case). We • have a binary signal
|
\begin{itemize}
|
||||||
|
\item If category is the wrong one (in the simple case). We have a binary signal
|
||||||
where we know that category is wrong.
|
where we know that category is wrong.
|
||||||
How to communicate it?
|
\end{itemize}
|
||||||
|
How to communicate it?\\
|
||||||
We can use the loss function: we can tell the algorithm whether is wrong or
|
We can use the loss function: we can tell the algorithm whether is wrong or
|
||||||
not.
|
not.\\\\
|
||||||
Loss function: measure discrepancy between ‘true’ label and predicted
|
\bred{Loss function}: measure discrepancy between ‘true’ label and predicted
|
||||||
label.
|
label.\\
|
||||||
So we may assume that every datapoint has a true label.
|
So we may assume that every datapoint has a true label.
|
||||||
If we have a set of topic this is the true topic that document is talking about.
|
If we have a set of topic this is the true topic that document is talking about.
|
||||||
It is typical in supervised learning.
|
It is typical in supervised learning.
|
||||||
@ -97,7 +106,7 @@ How good the algorithm did?
|
|||||||
|
|
||||||
were $y $ is true label and $\hat{y}$ is predicted label
|
were $y $ is true label and $\hat{y}$ is predicted label
|
||||||
\\\\
|
\\\\
|
||||||
We want to build a spam filter were $0$ is not spam and $1$ is spam and that
|
We want to build a spam filter were $0$ is not spam and $1$ is spam and that's a
|
||||||
Classification task:
|
Classification task:
|
||||||
\\\\
|
\\\\
|
||||||
$
|
$
|
||||||
@ -107,33 +116,30 @@ $
|
|||||||
\end{cases}
|
\end{cases}
|
||||||
$
|
$
|
||||||
\\\\
|
\\\\
|
||||||
The loss function is the “interface” between algorithm and data.
|
\textbf{The loss function is the “interface” between algorithm and data.}\\
|
||||||
So algorithm know about the data through the loss function.
|
So algorithm know about the data through the loss function.\\
|
||||||
If we give a useless loss function the algorithm will not perform good: is
|
If we give a useless loss function the algorithm will not perform good: is
|
||||||
important to have a good loss function.
|
important to have a good loss function.
|
||||||
Spam filtering
|
\subsection{Spam filtering}
|
||||||
We have two main mistakes:
|
$Y = \{ spam, no \, spam\}$
|
||||||
|
\\
|
||||||
|
Binary classification $|Y| = 2$ \\
|
||||||
|
We have two main mistake:
|
||||||
|
\begin{itemize}
|
||||||
|
\item False positive: $y = $non spam, $\hat{y} = $ spam
|
||||||
|
\item False negative: $y = $spam, $\hat{y} = $ no spam
|
||||||
|
\end{itemize}
|
||||||
It is the same mistake? No if i have important email and you classify as spam
|
It is the same mistake? No if i have important email and you classify as spam
|
||||||
that’s bad and if you show me a spam than it’s ok.
|
that’s bad and if you show me a spam than it’s ok.\\
|
||||||
So we have to assign a different weight.
|
So we have to assign a different weight.\\
|
||||||
|
$$
|
||||||
|
\ell\left(y,\hat{y}\right) = \begin{cases}
|
||||||
|
2 \quad \textit{if FP}\\
|
||||||
|
1 \quad \textit{if FN}\\
|
||||||
|
0 \quad otherwise
|
||||||
|
\end{cases}
|
||||||
|
$$
|
||||||
|
\bred{We have to take more attention on positive mistake}\\
|
||||||
Even in binary classification, mistakes are not equal.
|
Even in binary classification, mistakes are not equal.
|
||||||
e Iotf.TFprIuos.uos
|
|
||||||
True came
|
|
||||||
razee
|
|
||||||
Cussler aircN TASK spam ACG FIRM
|
|
||||||
ftp.y GO
|
|
||||||
IF F Y n is soon
|
|
||||||
IF FEY 0 Nor spam
|
|
||||||
ZERO CNE Cass
|
|
||||||
n n
|
|
||||||
Span No Seamy Binary Classification
|
|
||||||
I 2
|
|
||||||
FALSE PEENE Mistake Y NON SPAM J Spam
|
|
||||||
FN Mistake i f SPAM y NO spam
|
|
||||||
2 IF Fp Meter Airenita
|
|
||||||
f Y F on positive
|
|
||||||
y ye en MISTAKE
|
|
||||||
0 otherwise
|
|
||||||
0 otherwise
|
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
@ -6,8 +6,8 @@
|
|||||||
\@writefile{lot}{\addvspace {10\p@ }}
|
\@writefile{lot}{\addvspace {10\p@ }}
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {1.1}Argomento}{1}\protected@file@percent }
|
\@writefile{toc}{\contentsline {section}{\numberline {1.1}Argomento}{1}\protected@file@percent }
|
||||||
\@writefile{toc}{\contentsline {section}{\numberline {1.2}Loss}{1}\protected@file@percent }
|
\@writefile{toc}{\contentsline {section}{\numberline {1.2}Loss}{1}\protected@file@percent }
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.1}Absolute Loss}{2}\protected@file@percent }
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.1}Absolute Loss}{1}\protected@file@percent }
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.2}Square Loss}{2}\protected@file@percent }
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.2}Square Loss}{2}\protected@file@percent }
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.3}Example of information of square loss}{2}\protected@file@percent }
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.3}Example of information of square loss}{2}\protected@file@percent }
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.4}labels and losses}{4}\protected@file@percent }
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.4}labels and losses}{3}\protected@file@percent }
|
||||||
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.5}Example TF(idf) documents encoding}{5}\protected@file@percent }
|
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.5}Example TF(idf) documents encoding}{5}\protected@file@percent }
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 13 APR 2020 12:31
|
This is pdfTeX, Version 3.14159265-2.6-1.40.21 (MiKTeX 2.9.7300 64-bit) (preloaded format=pdflatex 2020.4.13) 15 APR 2020 21:01
|
||||||
entering extended mode
|
entering extended mode
|
||||||
**./lecture2.tex
|
**./lecture2.tex
|
||||||
(lecture2.tex
|
(lecture2.tex
|
||||||
@ -238,14 +238,7 @@ File: l3backend-pdfmode.def 2020-03-12 L3 backend support: PDF mode
|
|||||||
\l__kernel_color_stack_int=\count193
|
\l__kernel_color_stack_int=\count193
|
||||||
\l__pdf_internal_box=\box48
|
\l__pdf_internal_box=\box48
|
||||||
)
|
)
|
||||||
(lecture2.aux
|
(lecture2.aux)
|
||||||
|
|
||||||
Package babel Warning: Unknown language `ngerman'. Very likely you
|
|
||||||
(babel) requested it in a previous run. Expect some
|
|
||||||
(babel) wrong results in this run, which should vanish
|
|
||||||
(babel) in the next one. Reported on input line 21.
|
|
||||||
|
|
||||||
)
|
|
||||||
\openout1 = `lecture2.aux'.
|
\openout1 = `lecture2.aux'.
|
||||||
|
|
||||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 3.
|
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 3.
|
||||||
@ -340,15 +333,15 @@ Underfull \hbox (badness 10000) in paragraph at lines 17--30
|
|||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[1
|
|
||||||
|
|
||||||
|
|
||||||
{C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 32--35
|
Underfull \hbox (badness 10000) in paragraph at lines 32--35
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
[1
|
||||||
|
|
||||||
|
|
||||||
|
{C:/Users/AndreDany/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 49--52
|
Underfull \hbox (badness 10000) in paragraph at lines 49--52
|
||||||
|
|
||||||
[]
|
[]
|
||||||
@ -383,12 +376,12 @@ Underfull \hbox (badness 10000) in paragraph at lines 89--109
|
|||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[3]
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 110--115
|
Underfull \hbox (badness 10000) in paragraph at lines 110--115
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
[3]
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
Underfull \hbox (badness 10000) in paragraph at lines 118--156
|
||||||
|
|
||||||
[]
|
[]
|
||||||
@ -423,23 +416,23 @@ Underfull \hbox (badness 10000) in paragraph at lines 164--171
|
|||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[5]
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 172--182
|
Underfull \hbox (badness 10000) in paragraph at lines 172--182
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
[5]
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 189--199
|
Underfull \hbox (badness 10000) in paragraph at lines 189--199
|
||||||
|
|
||||||
[]
|
[]
|
||||||
|
|
||||||
[6] (lecture2.aux) )
|
[6] (lecture2.aux) )
|
||||||
Here is how much of TeX's memory you used:
|
Here is how much of TeX's memory you used:
|
||||||
5108 strings out of 480934
|
5099 strings out of 480934
|
||||||
68939 string characters out of 2909670
|
68776 string characters out of 2909670
|
||||||
334197 words of memory out of 3000000
|
333073 words of memory out of 3000000
|
||||||
20861 multiletter control sequences out of 15000+200000
|
20853 multiletter control sequences out of 15000+200000
|
||||||
547081 words of font info for 56 fonts, out of 3000000 for 9000
|
546309 words of font info for 55 fonts, out of 3000000 for 9000
|
||||||
1141 hyphenation exceptions out of 8191
|
1141 hyphenation exceptions out of 8191
|
||||||
42i,10n,50p,316b,142s stack positions out of 5000i,500n,10000p,200000b,50000s
|
42i,10n,50p,316b,142s stack positions out of 5000i,500n,10000p,200000b,50000s
|
||||||
<C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfo
|
<C:\Users\AndreDany\AppData\Local\MiKTeX\2.9\fonts/pk/ljfo
|
||||||
@ -460,9 +453,9 @@ es/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy10.pfb><C:/Program Files/MiKTe
|
|||||||
X 2.9/fonts/type1/public/amsfonts/cm/cmsy6.pfb><C:/Program Files/MiKTeX 2.9/fon
|
X 2.9/fonts/type1/public/amsfonts/cm/cmsy6.pfb><C:/Program Files/MiKTeX 2.9/fon
|
||||||
ts/type1/public/amsfonts/cm/cmsy8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/
|
ts/type1/public/amsfonts/cm/cmsy8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/
|
||||||
public/amsfonts/symbols/msbm10.pfb>
|
public/amsfonts/symbols/msbm10.pfb>
|
||||||
Output written on lecture2.pdf (6 pages, 167391 bytes).
|
Output written on lecture2.pdf (6 pages, 165099 bytes).
|
||||||
PDF statistics:
|
PDF statistics:
|
||||||
281 PDF objects out of 1000 (max. 8388607)
|
276 PDF objects out of 1000 (max. 8388607)
|
||||||
0 named destinations out of 1000 (max. 500000)
|
0 named destinations out of 1000 (max. 500000)
|
||||||
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
1 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||||
|
|
||||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user