class: center, middle, inverse, title-slide # Modelo AdaBoost ## A sabedoria ponderada das multidões ### ⚔
Mateus Maia
em conjunto com Anderson Ara ### 2019-06-27 --- #Aprendizado Estatístico Supervisionado <br/> #.large[$$g:\mathcal{X}\rightarrow\mathcal{Y}$$] <br/> #.large[$$\hat{g}(\mathbf{X})=\mathbf{Y}$$] --- # Aprendizado Estatístico Supervisionado .center[] --- # Aprendizado Estatístico Supervisionado .center[] --- class: inverse, center, middle # Modelos de Ensemble --- # Modelos de Ensemble <br/> <br/> .center[] --- # Modelos de Ensemble <br/> <br/> .center[] --- # Modelos de Boosting -**Boosting Adaptativo** <br/> <br/> -- -Boosting Gradiente <br/> <br/> -- -Boosting Gradiente Estocástico <br/> <br/> -- -eXtreme Gradient Boosting <br/> <br/> -- -LightBoosting --- class: inverse, center, middle # Boosting Adaptativo ## (AdaBoosting) --- # AdaBoosting Dado `$$\mathbf{y}\in\{1,1\}$$` <br/> <br/> `$$G(\mathbf{x})=sign \left(\sum_{m=1}^{M} \alpha_{m}g_{m}(\mathbf{x}) \right)$$` -- <br/> <br/> ##.center[A sabedoria ponderada das multidões] --- # AdaBoosting <br/> <br/> .center[] --- background-image: url(D:/my_computer/Est_ML_2019/presentation_meet_data_bahia/macaco_fazendo_conta.gif) background-size: cover class: center, middle, inverse # Hora das contas... --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 205 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 210 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 156 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> </tbody> </table> --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> <th style="text-align:center;"> Weights </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 205 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;"> 0.125 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;"> 0.125 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 210 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;"> 0.125 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;"> 0.125 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 156 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;"> 0.125 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;"> 0.125 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;"> 0.125 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;"> 0.125 </td> </tr> </tbody> </table> --- # AdaBoost ```r library(rpart) library(rpart.plot) g1<-rpart(Heart_Disease~Chest_Pain+Blocked_Arteries+Patient_Weight, data=medical_care, control = rpart.control(maxdepth=1,minsplit = 2)) rpart.plot(g1) ``` <img src="xaringan_presentation_files/figure-html/unnamed-chunk-3-1.png" style="display: block; margin: auto;" /> --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> <th style="text-align:center;"> g1 </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 205 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 210 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: red !important;">No</span> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 156 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> </tr> </tbody> </table> -- `$$\alpha_{1}=\frac{1}{2}log\left(\frac{1-\epsilon_{1}}{\epsilon_{1}} \right)$$` -- `$$\alpha_{1}=\frac{1}{2}log\left(\frac{1-\epsilon_{1}}{\epsilon_{1}} \right)=\frac{1}{2}log\left(\frac{1-0.125}{0.125} \right)=0.973$$` --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> <th style="text-align:center;"> g1 </th> <th style="text-align:center;"> New_Weights </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 205 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 210 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: red !important;">No</span> </td> <td style="text-align:center;"> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 156 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> </td> </tr> </tbody> </table> -- `$$New\;weights_{correct}=weights_{correct}\times e^{-\alpha_{1}}$$` -- `$$New\;weights_{correct}=0.125\times e^{-0.973}=0.047$$` --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> <th style="text-align:center;"> g1 </th> <th style="text-align:center;"> New_Weights </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 205 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 210 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: red !important;">No</span> </td> <td style="text-align:center;"> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 156 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> </tbody> </table> `$$New\;weights_{correct}=weights_{correct}\times e^{-\alpha_{1}}$$` `$$New\;weights_{correct}=0.125\times e^{-0.973}=0.047$$` --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> <th style="text-align:center;"> g1 </th> <th style="text-align:center;"> New_Weights </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 205 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 210 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: red !important;">No</span> </td> <td style="text-align:center;"> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 156 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> </tbody> </table> -- `$$New\;weights_{wrong}=weights_{wrong}\times e^{\alpha_{1}}$$` -- `$$New\;weights_{wrong}=0.125\times e^{0.973}=0.331$$` --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> <th style="text-align:center;"> g1 </th> <th style="text-align:center;"> New_Weights </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 205 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 210 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: red !important;">No</span> </td> <td style="text-align:center;"> 0.331 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 156 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> </tbody> </table> `$$New\;weights_{wrong}=weights_{wrong}\times e^{\alpha_{1}}$$` `$$New\;weights_{wrong}=0.125\times e^{0.973}=0.331$$` --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> <th style="text-align:center;"> g1 </th> <th style="text-align:center;"> New_Weights </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 205 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 210 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: red !important;">No</span> </td> <td style="text-align:center;"> 0.331 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 156 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.047 </td> </tr> </tbody> </table> --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> <th style="text-align:center;"> g1 </th> <th style="text-align:center;"> Norm_Weights </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 205 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.071 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.071 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 210 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> <td style="text-align:center;"> 0.071 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: red !important;">No</span> </td> <td style="text-align:center;"> 0.502 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 156 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.071 </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.071 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.071 </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> <td style="text-align:center;"> 0.071 </td> </tr> </tbody> </table> --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> </tbody> </table> --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> <tr> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> </tr> <tr> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> </tr> <tr> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> <tr> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;font-weight: bold;color: white !important;background-color: #3454D1 !important;"> Yes </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> </tr> </tbody> </table> --- # AdaBoosting ```r g2<-rpart(Heart_Disease~Chest_Pain+Blocked_Arteries+Patient_Weight, data=reweighted_data, control = rpart.control(maxdepth=1,minsplit = 2)) rpart.plot(g2) ``` <img src="xaringan_presentation_files/figure-html/unnamed-chunk-13-1.png" style="display: block; margin: auto;" /> --- #AdaBoosting <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> Chest_Pain </th> <th style="text-align:center;"> Blocked_Arteries </th> <th style="text-align:center;"> Patient_Weight </th> <th style="text-align:center;"> Heart_Disease </th> <th style="text-align:center;"> g2 </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 172 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: red !important;">Yes</span> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> No </td> <td style="text-align:center;"> 168 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">No</span> </td> </tr> <tr> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 167 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 125 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> No </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: red !important;">Yes</span> </td> </tr> <tr> <td style="text-align:center;"> No </td> <td style="text-align:center;"> Yes </td> <td style="text-align:center;"> 180 </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> Yes </td> <td style="text-align:center;font-weight: bold;color: #243B4A !important;"> <span style=" color: green !important;">Yes</span> </td> </tr> </tbody> </table> -- `$$\alpha_{2}=\frac{1}{2}log\left(\frac{1-\epsilon_{2}}{\epsilon_{2}} \right)$$` -- `$$\alpha_{2}=\frac{1}{2}log\left(\frac{1-\epsilon_{1}}{\epsilon_{1}} \right)=\frac{1}{2}log\left(\frac{1-0.25}{0.25} \right)=0.549$$` --- #AdaBoosting <br/> <br/> `$$G(\mathbf{x})=sign \left(\sum_{m=1}^{M} \alpha_{m}g_{m}(\mathbf{x}) \right)$$` -- <br/> <br/> `$$G(\mathbf{x_{i}})=sign \left(\alpha_{1}g_{1}(\mathbf{x}_{i})+\alpha_{2}g_{2}(\mathbf{x}_{i})+\dots+ \alpha_{M}g_{M}(\mathbf{x_{i}}) \right)$$` -- <br/> <br/> `$$G(\mathbf{x_{i}})=sign \left(1\times(\alpha_{1,Sim}+\dots+\alpha_{k,Sim})+(-1)\times \right(\alpha_{2,Não}+\dots+\alpha_{p,Não}))$$` --- class: inverse, center, middle # Dados Simulados --- # Dados Simulados .center[] --- # Dados Simulados .center[] --- # Dados Simulados .center[] --- background-image: url(D:/my_computer/Est_ML_2019/presentation_meet_data_bahia/frog_write.gif) background-size: cover class: center, bottom, inverse # Mineração de Texto ## (e AdaBoosting) --- # Text Mining e algumas aplicações - __Classificação de Textos__ -- - Clusterização de Textos -- - Análise de Sentimentos -- - *Concept Extraction* -- - Summarização de Documentos --- ##Classificação de Textos .pull.left[] .pull.right[] --- # Bag of Words <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:center;"> X1 </th> <th style="text-align:center;"> timewarn </th> <th style="text-align:center;"> profit </th> <th style="text-align:center;"> warner </th> <th style="text-align:center;"> quarter </th> <th style="text-align:center;"> fourth </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> b001.txt </td> <td style="text-align:center;"> 0.3804104 </td> <td style="text-align:center;"> 0.3130291 </td> <td style="text-align:center;"> 0.1902052 </td> <td style="text-align:center;"> 0.1511458 </td> <td style="text-align:center;"> 0.1283632 </td> </tr> <tr> <td style="text-align:center;"> b002.txt </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> </tr> <tr> <td style="text-align:center;"> b003.txt </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> </tr> <tr> <td style="text-align:center;"> b004.txt </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0283345 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0273626 </td> <td style="text-align:center;"> 0.0000000 </td> </tr> <tr> <td style="text-align:center;"> b005.txt </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> </tr> <tr> <td style="text-align:center;"> b006.txt </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.1400322 </td> <td style="text-align:center;"> 0.0000000 </td> </tr> <tr> <td style="text-align:center;"> b007.txt </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> </tr> <tr> <td style="text-align:center;"> b008.txt </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> </tr> <tr> <td style="text-align:center;"> b009.txt </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> </tr> <tr> <td style="text-align:center;"> b010.txt </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> <td style="text-align:center;"> 0.0000000 </td> </tr> </tbody> </table> --- class: center, middle, inverse # Pré-processamento de texto --- # Stemming <br/> <br/> .center[] --- # Stop Words .center[] --- # Ponderação dos Termos Para um __termo i__ no __documento j__ .center[] --- class: inverse, center, middle # Modelagem ## AdaBoosting --- # Base de dados e parâmetros do modelo -400 observações com 5231 covariáveis -- -200 notícias de __Esportes__ e 200 notícias de __Negócios__ -- -10-Folds com 10 repetições -- --- class: center, middle, inverse # Quantos Stumps (tocos) utilizar? --- #Tuning <br/> <br/> .center[] --- --- class: center, middle,inverse # Obrigado! mateusmaia11@gmail.com <br/> <br/> Quem quiser explorar um pouco mais o AdaBoosting mateusmaia.shinyapps.io/adaboosting/