\relax \citation{Hart67} \citation{Gates72,Hart68a,Dasarathy94,lot2010,\\Aha91,Bezdek2001,Brighton2002,CHL03,Carrasco05,Chien06,García2008,Lumini06,Narayan06,OCM2007,\\OCM2008,Raicharoen05,Riquelme03,Ritter75,Srisawat06,Tomek76,Veenman2005,Wilson72,wilson00,Olvera2010} \citation{Frank+Asuncion:2010} \citation{jalali08} \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}} \newlabel{section:introduction}{{1}{1}} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Illustration of brittleness in the iris data-set. Before using the instance selector CLIFF (decribed later in this paper) the classes versicolor and virginica show intermingling (brittleness). After using CLIFF, brittleness is reduced.}}{1}} \newlabel{fig:intropic}{{1}{1}} \citation{Frank+Asuncion:2010} \citation{09NAS} \citation{knn} \citation{Devi2002,Bezdek2001,Dasarathy94,Li2009,Bezdek98,Cano2005,Garain2008,Veenman2005,García2008} \citation{Dasarathy94} \citation{Hart68a} \citation{Gates72} \citation{lot2010} \citation{Hart68a} \@writefile{toc}{\contentsline {section}{\numberline {2}Background}{2}} \newlabel{section:bkg}{{2}{2}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Classification with kNN}{2}} \newlabel{subsection:knn}{{2.1}{2}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}The Brittleness Measure}{2}} \newlabel{subsection:bm}{{2.2}{2}} \@writefile{toc}{\contentsline {section}{\numberline {3}Related Work}{2}} \newlabel{section:related}{{3}{2}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Condensed Nearest Neighbor (CNN)}{2}} \citation{Gates72} \citation{Hart68a} \citation{Dasarathy94} \citation{lot2010} \citation{lot2010} \citation{Olvera2010} \citation{Olvera2010} \citation{Olvera2010} \citation{Olvera2010} \citation{FCNN07} \citation{Hart68a} \citation{wilson00} \citation{Ritter75} \citation{Wilson72} \citation{Raicharoen05} \citation{Tomek76} \citation{Kriegel05} \citation{Brighton2002} \citation{Garain2008} \citation{OCM2007,Carrasco05} \citation{Riquelme03} \citation{Raicharoen05} \citation{Narayan06} \citation{Olvera2010} \citation{Olvera2010} \citation{jalali08} \citation{Fayyad1992} \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Reduced Nearest Neighbor (RNN)}{3}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Minimal Consistent Set (MCS)}{3}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Prototype Selection by Clustering (PSC)}{3}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.5}More IS}{3}} \@writefile{toc}{\contentsline {section}{\numberline {4}CLIFF Design and Operation}{3}} \newlabel{section:cliff}{{4}{3}} \citation{jalali08} \citation{jalali08} \citation{jalali08} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Characteristics of some IS (extracted from Table 1 of \cite {Olvera2010}). Also included are some of the published time complexities of the instance selectors.}}{4}} \newlabel{fig:lot-tab}{{2}{4}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.1}BORE}{4}} \newlabel{section:bore}{{4.1}{4}} \newlabel{eq:one1}{{1}{4}} \newlabel{eq:one}{{2}{4}} \newlabel{fig:sbbra}{{4.1}{5}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Using Criteria for Instance Selection}{5}} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Instance Selection Method used in CLIFF}}{5}} \newlabel{fig:isc}{{3}{5}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.3}CLIFF: A Simple Example}{5}} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces A log of some golf-playing behavior}}{5}} \newlabel{fig:golf}{{4}{5}} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Finding the rank of $sunny$}}{5}} \newlabel{fig:rank1}{{5}{5}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.4}CLIFF: Time Complexity}{5}} \newlabel{section:time}{{4.4}{5}} \citation{Hart68a} \citation{Dasarathy94} \citation{lot2010} \citation{Frank+Asuncion:2010} \citation{Fayyad1992} \citation{burak} \citation{wilson00} \citation{wilson00} \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Data Set Characteristics}}{6}} \newlabel{fig:info}{{6}{6}} \@writefile{toc}{\contentsline {section}{\numberline {5}CLIFF Assessment}{6}} \newlabel{section:assess}{{5}{6}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Data Sets}{6}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Experimental Method}{6}} \newlabel{section:brit}{{5.2}{6}} \newlabel{fig:knnexp1}{{5.2}{6}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Experimental Results}{6}} \citation{wilson00} \newlabel{}{{5.2}{7}} \newlabel{}{{5.2}{7}} \newlabel{}{{5.2}{7}} \newlabel{}{{5.2}{7}} \newlabel{}{{5.2}{7}} \@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Effect of noise on reduction percent. As noise increases from 0 - 40\%, inferior instance selectors generally increase in reduction \%, i.e. the number of instances selected from each training set increases as noise increases. }}{7}} \newlabel{fig:red-results}{{7}{7}} \@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Visual illustration of brittleness reduction in the breast cancer data-set for KNN and the IS. Here, the greater the position of each test instance with a predicted target class from an instance with a different target class the better.}}{7}} \newlabel{fig:brit-results}{{8}{7}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.1}Reduction Percent}{7}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.2}Visualization of Brittleness}{7}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.3}The Effects of Noise}{7}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.4}Is CLIFF significantly better than other IS?}{7}} \citation{Walsh94} \citation{09NAS} \citation{09NAS} \@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces PDS: other - (knn+cliff). Note: {\em negative} values means CLIFF is performing {\em better}.}}{8}} \newlabel{fig:better1}{{10}{8}} \@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces PRECISION: other - (knn+cliff). Note: {\em negative} values means CLIFF is performing {\em better}.}}{8}} \newlabel{fig:better2}{{11}{8}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.4}Summary}{8}} \@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces PFs: other - (knn+cliff). Note: {\em positive} values means CLIFF is performing {\em better}.}}{8}} \newlabel{fig:better3}{{12}{8}} \@writefile{toc}{\contentsline {section}{\numberline {6}Case Study: Reducing Brittleness in Forensic Models}{8}} \newlabel{section:forensics}{{6}{8}} \citation{Walsh94} \citation{Seheult78} \citation{Grove80} \citation{Evett94} \citation{Karslake09} \citation{fastmap} \@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Effects of noise: Performance measures (pd, prec and pf) of CLIFF vs. KNN in the presence of noise.}}{9}} \newlabel{fig:noise}{{9}{9}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.1}The CLIFF Avoidance Model (CAM)}{9}} \citation{fastmap} \citation{fastmap} \citation{Du2008} \citation{fastmap} \citation{fastmap} \citation{fastmap} \citation{fastmap} \@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Proposed procedure for the forensic evaluation of data}}{10}} \newlabel{fig:process}{{13}{10}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {6.1.1}Dimensionality Reduction}{10}} \@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Example of using the cosine law to find the position of $Oi$ in the dimension $k$. Extracted from \cite {fastmap}.}}{10}} \newlabel{fig:fm1}{{14}{10}} \@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces Projects of points $O_i$ and $O_j$ onto the hyper-plane perpendicular to the line $O_a$$O_b$. Extracted from \cite {fastmap}.}}{10}} \newlabel{fig:fm2}{{15}{10}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Clustering}{10}} \citation{Karslake09} \citation{09Zadora,09aZadora,06Aitken,04Aitken,02Koons,99Koons} \@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces Effects of noise: Performance measures (pd, prec and pf) of CLIFF vs. KNN in the presence of noise.}}{11}} \newlabel{fig:snoise}{{16}{11}} \newlabel{fig:kmeans}{{6.2}{11}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Data Set and Experimental Method}{11}} \newlabel{section:brit1}{{6.3}{11}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {6.3.1}Is CAM a viable forensic model?}{11}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {6.3.2}Does CAM reduce brittleness?}{11}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.4}Summary}{11}} \@writefile{lof}{\contentsline {figure}{\numberline {20}{\ignorespaces Position of values in 1NN and CAM population with data set at 3, 5, 10 and 20 clusters.}}{12}} \newlabel{fig:dist3}{{20}{12}} \@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces PDS: other - (knn+cliff). Note: {\em negative} values means CLIFF is performing {\em better}.}}{12}} \newlabel{fig:sbetter1}{{17}{12}} \@writefile{lof}{\contentsline {figure}{\numberline {18}{\ignorespaces PRECISION: other - (knn+cliff). Note: {\em negative} values means CLIFF is performing {\em better}.}}{12}} \newlabel{fig:sbetter2}{{18}{12}} \@writefile{lof}{\contentsline {figure}{\numberline {19}{\ignorespaces PFs: other - (knn+cliff). Note: {\em positive} values means CLIFF is performing {\em better}.}}{12}} \newlabel{fig:sbetter3}{{19}{12}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.5}Threats to Validity}{12}} \newlabel{section:ttv}{{6.5}{12}} \@writefile{toc}{\contentsline {section}{\numberline {7}Conclusions and Future Work}{12}} \newlabel{section:conclusion}{{7}{12}} \@writefile{toc}{\contentsline {subsection}{\numberline {7.1}Conclusions}{12}} \citation{09NAS} \citation{me07} \citation{fastmap} \bibstyle{IEEEtran} \bibdata{newcliff-v2} \bibcite{Hart67}{1} \bibcite{Gates72}{2} \bibcite{Hart68a}{3} \bibcite{Dasarathy94}{4} \bibcite{lot2010}{5} \bibcite{Bezdek2001}{6} \bibcite{Brighton2002}{7} \bibcite{CHL03}{8} \bibcite{Carrasco05}{9} \bibcite{Chien06}{10} \bibcite{García2008}{11} \bibcite{Lumini06}{12} \bibcite{Narayan06}{13} \bibcite{OCM2007}{14} \bibcite{Raicharoen05}{15} \bibcite{Riquelme03}{16} \bibcite{Ritter75}{17} \bibcite{Srisawat06}{18} \bibcite{Tomek76}{19} \bibcite{Veenman2005}{20} \@writefile{toc}{\contentsline {subsection}{\numberline {7.2}Future Work}{13}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.1}Using CLIFF with Other Classifiers}{13}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.2}Using CLIFF to Optimized Feature Subset Selection}{13}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {7.2.3}Comparing CAM to Other Forensic Models and Forensic data-sets}{13}} \@writefile{toc}{\contentsline {section}{References}{13}} \bibcite{Wilson72}{21} \bibcite{wilson00}{22} \bibcite{Olvera2010}{23} \bibcite{Frank+Asuncion:2010}{24} \bibcite{jalali08}{25} \bibcite{09NAS}{26} \bibcite{knn}{27} \bibcite{Devi2002}{28} \bibcite{Li2009}{29} \bibcite{Bezdek98}{30} \bibcite{Cano2005}{31} \bibcite{Garain2008}{32} \bibcite{FCNN07}{33} \bibcite{Kriegel05}{34} \bibcite{Fayyad1992}{35} \bibcite{burak}{36} \bibcite{Walsh94}{37} \bibcite{Seheult78}{38} \bibcite{Grove80}{39} \bibcite{Evett94}{40} \bibcite{Karslake09}{41} \bibcite{fastmap}{42} \bibcite{Du2008}{43} \bibcite{09Zadora}{44} \bibcite{09aZadora}{45} \bibcite{06Aitken}{46} \bibcite{04Aitken}{47} \bibcite{02Koons}{48} \bibcite{99Koons}{49} \bibcite{me07}{50}