\relax \citation{me05a} \citation{boehm00a} \citation{andrews07} \citation{holzmann97} \citation{port08} \citation{endres03} \citation{endres03} \citation{fenton07} \citation{witten05} \@writefile{toc}{\contentsline {chapter}{\numberline {1}Introduction}{1}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{introduction}{{1}{1}} \@writefile{toc}{\contentsline {section}{\numberline {1.1}Motivation}{1}} \citation{norvig03,She02,leake96,kol93,leake05} \citation{me00f,me07g,me07f,orrego09,me09a,me09e,me09f,me09i,me10f} \citation{ruhe09,zhang07a} \citation{ruhe09} \citation{shepperd97,Mendes2003,Kirsopp2002,Walkerden1999} \citation{me00f,me07g,me07f,orrego09,me09a,me09e,me09f,me09i,me10f} \citation{brady10a} \citation{brady10b} \@writefile{toc}{\contentsline {section}{\numberline {1.2}Statement of Thesis}{4}} \@writefile{toc}{\contentsline {section}{\numberline {1.3}Contribution of This Work}{4}} \@writefile{toc}{\contentsline {section}{\numberline {1.4}Document Structure}{5}} \citation{shepperd97,Mendes2003,Kirsopp2002,Walkerden1999} \citation{pendharkar05,Li2009,Lipowezky1998,Walkerden1999,Kirsopp2002,Mendes2003,jorgensen05,shepperd07,shepperd97} \citation{pendharkar05} \citation{fenton99} \citation{harman04} \citation{harman04} \citation{hall03} \citation{miller02} \citation{Dil84} \citation{hall03} \@writefile{toc}{\contentsline {chapter}{\numberline {2}Related Work}{7}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{related}{{2}{7}} \@writefile{toc}{\contentsline {section}{\numberline {2.1}Software Estimation Research}{7}} \citation{jalali08} \citation{andrews07} \citation{me07f} \citation{gof95} \citation{pfahl05} \citation{me02f} \citation{boehm00b} \@writefile{toc}{\contentsline {section}{\numberline {2.2}Search-Based Software Engineering (SBSE)}{8}} \@writefile{toc}{\contentsline {section}{\numberline {2.3}Model: Benefits}{8}} \citation{fenton07} \citation{chulani99} \citation{schulz10} \citation{me07f} \@writefile{toc}{\contentsline {section}{\numberline {2.4}Model: Drawbacks}{9}} \newlabel{sect:draw}{{2.4}{9}} \@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces Features of the COCOMO model ontology.}}{10}} \newlabel{fig:cocattrs}{{2.1}{10}} \citation{lowry10} \citation{baker07} \citation{boehm00b} \citation{baker07} \citation{boehm81} \newlabel{eq:simpcoc}{{2.1}{11}} \newlabel{eq:ab}{{2.2}{11}} \citation{me05a} \citation{me05a} \citation{me05a} \citation{me05a} \@writefile{lof}{\contentsline {figure}{\numberline {2.2}{\ignorespaces COCOMO 1 effort multipliers, and the sorted coefficients found by linear regression from twenty 66\% sub-samples (selected at random) from the NASA93 PROMISE data set; from\nobreakspace {}\cite {me05a}. }}{13}} \newlabel{fig:coc1}{{2.2}{13}} \citation{bartlett32} \citation{loftus03} \@writefile{toc}{\contentsline {chapter}{\numberline {3}The {$\mathcal {W}$}2 Algorithm}{14}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{sect:inst}{{3}{14}} \@writefile{toc}{\contentsline {section}{\numberline {3.1}Case-Based Reasoning}{14}} \citation{kolodner83} \citation{aamod94} \@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces Four steps of case-based reasoning, from \url {http://www.peerscience.com/intro_cbr.htm}.}}{15}} \newlabel{fig:cbr}{{3.1}{15}} \citation{schank83} \@writefile{toc}{\contentsline {section}{\numberline {3.2}Contrast Set Learning (CSL)}{16}} \newlabel{sect:csl-description}{{3.2}{16}} \citation{me09f} \citation{mozina04} \citation{me10c} \newlabel{eq:br}{{3.1}{18}} \@writefile{toc}{\contentsline {section}{\numberline {3.3}The {$\mathcal {W}$}2 Algorithm}{18}} \newlabel{sect:w-desc}{{3.3}{18}} \citation{brooks75} \@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces The Brooks' Law Query for the NASA93 dataset in COCOMO II format.}}{19}} \newlabel{fig:brooks}{{3.2}{19}} \@writefile{lof}{\contentsline {figure}{\numberline {3.3}{\ignorespaces RELEVANT= cases nearest to $context_1$.}}{20}} \newlabel{fig:train-filter}{{3.3}{20}} \@writefile{lof}{\contentsline {figure}{\numberline {3.4}{\ignorespaces $Best$ (top) \& $rest$ (bottom).}}{21}} \newlabel{fig:train-contrast}{{3.4}{21}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.3.1}Relevancy Filtering}{21}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.3.2}Utility Separation}{21}} \@writefile{lof}{\contentsline {figure}{\numberline {3.5}{\ignorespaces Rank with Equation\nobreakspace {}3.1\hbox {}.}}{22}} \newlabel{fig:train-bestrest}{{3.5}{22}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.3.3}Contrast Set Generation}{22}} \@writefile{lof}{\contentsline {figure}{\numberline {3.6}{\ignorespaces A $K_1=20$ neighborhood of $context_1$ (NASA93ii train set).}}{23}} \newlabel{fig:testing}{{3.6}{23}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.3.4}Estimating Impact}{23}} \@writefile{lof}{\contentsline {figure}{\numberline {3.7}{\ignorespaces All rows of Figure\nobreakspace {}3.6\hbox {} satisfying $R_1 : pmat=3$. }}{24}} \newlabel{fig:testing-constrained}{{3.7}{24}} \@writefile{lof}{\contentsline {figure}{\numberline {3.8}{\ignorespaces The testing set with all cases not containing $pmat = 3$ removed. }}{24}} \newlabel{fig:testing-applied}{{3.8}{24}} \@writefile{lof}{\contentsline {figure}{\numberline {3.9}{\ignorespaces Result of applying the learned constraint $pmat = 3$ to the Brooks' Law query $q$ during testing. The median estimate reduction from 235 to 81 represents a 66\% reduction is software effort by applying $pmat = 3$.}}{24}} \newlabel{fig:testing-result}{{3.9}{24}} \citation{shepperd97} \citation{watson98} \@writefile{lof}{\contentsline {figure}{\numberline {3.10}{\ignorespaces Revising $q$ to learn $q'$.}}{25}} \newlabel{fig:w}{{3.10}{25}} \@writefile{lof}{\contentsline {figure}{\numberline {3.11}{\ignorespaces {$\mathcal {W}$}2's syntax for describing the input query $q$. Here, all the values run 1 to 6. $4\le cplx \le 6$ denotes projects with above average complexity. Question marks denote what can be controlled- in this case, $rely,time$ (required reliability and development time)}}{26}} \newlabel{fig:nasaproj}{{3.11}{26}} \@writefile{toc}{\contentsline {section}{\numberline {3.4}Measuring Performance}{26}} \@writefile{toc}{\contentsline {chapter}{\numberline {4}Experiments with {$\mathcal {W}$}2}{28}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{experiments}{{4}{28}} \@writefile{toc}{\contentsline {section}{\numberline {4.1}Datasets and Project Descriptions}{28}} \@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Seven data sets from \url {promisedata.org/?cat=14}: {\em effort} is total staff person-months; {\em time} is calendar time (start to stop); {\em defects} represents the number of delivered defects. }}{28}} \newlabel{fig:data}{{4.1}{28}} \citation{miyazaki94} \citation{kemerer87} \citation{shepperd97} \citation{brady10b} \@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces Example project $controllable$ file for Chinese software projects after discretization. Ranges were assigned randomly for this project. A ``?'' represents a controllable feature. If an attribute range isn't specified in the project, it is ignored.}}{30}} \newlabel{fig:china-p2}{{4.2}{30}} \@writefile{toc}{\contentsline {section}{\numberline {4.2}Experiment: {$\mathcal {W}$}2 vs {$\mathcal {W}$}}{30}} \newlabel{sect:runtimes}{{4.2}{30}} \@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces Average execution times for the W and W2 algorithms. By removing the $O(n^2)$ kth nearest neighbor calculation from W we drastically improve performance, especially on larger datasets such as China (499 cases).}}{31}} \newlabel{fig:w2-runtimes}{{4.3}{31}} \@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces Performance of W2's Overlap relevancy filtering vs W's kth nearest-neighbor filtering for 5 unique datasets.}}{31}} \newlabel{fig:bestoverlap-vs-knn}{{4.4}{31}} \@writefile{toc}{\contentsline {section}{\numberline {4.3}Experiment: {$\mathcal {W}$}2's Performance Across Multiple Datasets}{32}} \newlabel{sect:multi-comp}{{4.3}{32}} \@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces Effort estimation improvements ($100*\frac {initial-final}{intial}$) for five unique datasets. Sorted by median improvement. Gray cells represent no improvement in effort estimates.}}{33}} \newlabel{fig:other-reduce-values}{{4.5}{33}} \@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces Effort results for five non-COCOMO datasets. }}{34}} \newlabel{fig:other-reduce}{{4.6}{34}} \@writefile{toc}{\contentsline {section}{\numberline {4.4}Experiment: Intra- and Inter-Project Stability}{34}} \newlabel{sect:stable}{{4.4}{34}} \@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces Range of changes in median and spread generated by applying the recommendations of {$\mathcal {W}$}2. The median observed changes were (20.5, 20.5)\% for (medians, spreads), respectively. }}{35}} \newlabel{fig:changes}{{4.7}{35}} \citation{me10b} \citation{zimmermann09} \@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces Recommendation frequency across 20 runs of {$\mathcal {W}$}2 for reducing individual goals ($defects$, $effort$, or $months$) as well as all goals at once ($all$).}}{36}} \newlabel{fig:stability}{{4.8}{36}} \citation{me09a} \citation{me09a} \@writefile{toc}{\contentsline {section}{\numberline {4.5}Experiment: Comparing Drastic Changes to {$\mathcal {W}$}2}{37}} \newlabel{sect:drastic-comp}{{4.5}{37}} \@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces Examples of drastic changes to software projects.}}{38}} \newlabel{fig:drastic-overview}{{4.9}{38}} \@writefile{lof}{\contentsline {figure}{\numberline {4.10}{\ignorespaces Comparing defect, effort, and month estimation reduction percentages ($100*\frac {initial-final}{intial}$ of drastic business decisions vs {$\mathcal {W}$}'s recommendations for the Ground case study.}}{38}} \newlabel{fig:drastic-ground}{{4.10}{38}} \@writefile{lof}{\contentsline {figure}{\numberline {4.11}{\ignorespaces Comparing defect, effort, and month estimation reduction percentages ($100*\frac {initial-final}{intial}$ of drastic business decisions vs {$\mathcal {W}$}'s recommendations for the Flight case study.}}{39}} \newlabel{fig:drastic-flight}{{4.11}{39}} \@writefile{lof}{\contentsline {figure}{\numberline {4.12}{\ignorespaces Comparing defect, effort, and month estimation reduction percentages ($100*\frac {initial-final}{intial}$ of drastic business decisions vs {$\mathcal {W}$}'s recommendations for the OSP case study.}}{40}} \newlabel{fig:drastic-osp}{{4.12}{40}} \@writefile{lof}{\contentsline {figure}{\numberline {4.13}{\ignorespaces Comparing defect, effort, and month estimation reduction percentages ($100*\frac {initial-final}{intial}$ of drastic business decisions vs {$\mathcal {W}$}'s recommendations for the OSP2 case study.}}{41}} \newlabel{fig:drastic-osp2}{{4.13}{41}} \@writefile{toc}{\contentsline {chapter}{\numberline {5}Model-Based vs. Case-Based Algorithms}{42}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{sect:modelvinstance}{{5}{42}} \@writefile{toc}{\contentsline {section}{\numberline {5.1}Model-based Case Studies}{42}} \newlabel{sect:data}{{5.1}{42}} \@writefile{lof}{\contentsline {figure}{\numberline {5.1}{\ignorespaces Contexts of 4 case studies. \textit {\{1, 2, 3, 4, 5, 6\}} map to \textit {\{very low, low, nominal, high, very high, extra high\}}.}}{43}} \newlabel{fig:cases}{{5.1}{43}} \citation{me07f} \@writefile{toc}{\contentsline {section}{\numberline {5.2}SEESAW}{44}} \newlabel{eq:pred}{{5.1}{44}} \newlabel{eq:simp}{{5.2}{44}} \citation{boehm00b} \newlabel{eq:argmin}{{5.4}{45}} \newlabel{eq:lineem}{{5.5}{45}} \citation{boehm00b} \citation{me07f,me08c,me09e} \citation{me09a} \citation{me09i} \newlabel{eq:linesf}{{5.6}{46}} \citation{me07f} \citation{kirkpatrick83} \@writefile{lof}{\contentsline {figure}{\numberline {5.2}{\ignorespaces Pseudocode for SEESAW}}{48}} \newlabel{fig:seesaw}{{5.2}{48}} \citation{selman96} \@writefile{toc}{\contentsline {section}{\numberline {5.3}Five Additional AI Model-Based Algorithms}{49}} \newlabel{eq:edt}{{5.7}{49}} \citation{craw94} \@writefile{lof}{\contentsline {figure}{\numberline {5.3}{\ignorespaces Example of SA's forward and back select.}}{50}} \newlabel{fig:run}{{5.3}{50}} \@writefile{toc}{\contentsline {section}{\numberline {5.4}Comparisons of AI Model-based Methods}{51}} \newlabel{sect:aicomp}{{5.4}{51}} \citation{boehm81} \@writefile{lof}{\contentsline {figure}{\numberline {5.4}{\ignorespaces Number of times algorithms were top-ranked (largest is 4: i.e. one for each Figure\nobreakspace {}5.1\hbox {} case study). }}{52}} \newlabel{fig:xper}{{5.4}{52}} \@writefile{toc}{\contentsline {section}{\numberline {5.5}Model vs. Case-Based Methods}{52}} \newlabel{sect:modelins}{{5.5}{52}} \@writefile{lof}{\contentsline {figure}{\numberline {5.5}{\ignorespaces Changes in median and spread for the NASA93 dataset. }}{54}} \newlabel{fig:nasa93results}{{5.5}{54}} \@writefile{lof}{\contentsline {figure}{\numberline {5.6}{\ignorespaces Changes in median and spread for the COC81 dataset. }}{55}} \newlabel{fig:cocomo81results}{{5.6}{55}} \citation{boehm00b} \citation{boehm00a} \@writefile{toc}{\contentsline {chapter}{\numberline {6}Discussion}{57}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{discussion}{{6}{57}} \@writefile{toc}{\contentsline {section}{\numberline {6.1}When Not to Use {$\mathcal {W}$}2}{57}} \newlabel{sect:delta}{{6.1}{57}} \citation{boehm00a} \citation{Boehm96softwarecost} \citation{shank77} \citation{shepperd07} \@writefile{lof}{\contentsline {figure}{\numberline {6.1}{\ignorespaces Relative effects on development effort. From\nobreakspace {}\cite {boehm00a}. }}{59}} \newlabel{fig:coceffects}{{6.1}{59}} \@writefile{toc}{\contentsline {section}{\numberline {6.2}Model-lite}{59}} \newlabel{sect:modellite}{{6.2}{59}} \citation{wolpert94} \@writefile{toc}{\contentsline {section}{\numberline {6.3}Scope of the Study}{60}} \citation{Li2009,Lipowezky1998,Walkerden1999,Kirsopp2002,Mendes2003} \citation{ostrand04,journals/ese/KhoshgoftaarS03,tosun10,me10a} \citation{bartlett32} \citation{kolodner83} \citation{shank77} \citation{azzeh08} \@writefile{toc}{\contentsline {chapter}{\numberline {7}Conclusion}{62}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{conclusion}{{7}{62}} \citation{me10c} \citation{me10c} \citation{me01c} \@writefile{toc}{\contentsline {chapter}{\numberline {A}{$\mathcal {W}$}2 Source Code}{65}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{source}{{A}{65}} \@writefile{toc}{\contentsline {section}{\numberline {A.1}w.sh}{65}} \@writefile{lol}{\contentsline {lstlisting}{code/w.sh}{65}} \@writefile{toc}{\contentsline {section}{\numberline {A.2}w.awk}{66}} \@writefile{lol}{\contentsline {lstlisting}{code/w.awk}{66}} \@writefile{toc}{\contentsline {section}{\numberline {A.3}apply.awk}{71}} \@writefile{lol}{\contentsline {lstlisting}{code/apply.awk}{71}} \@writefile{toc}{\contentsline {section}{\numberline {A.4}contrast.awk}{79}} \@writefile{lol}{\contentsline {lstlisting}{code/contrast.awk}{79}} \@writefile{toc}{\contentsline {section}{\numberline {A.5}discretize.awk}{81}} \@writefile{lol}{\contentsline {lstlisting}{code/scripts/discretize.awk}{81}} \@writefile{toc}{\contentsline {section}{\numberline {A.6}neighbors.awk}{84}} \@writefile{lol}{\contentsline {lstlisting}{code/neighbors.awk}{84}} \@writefile{toc}{\contentsline {section}{\numberline {A.7}projects.awk}{85}} \@writefile{lol}{\contentsline {lstlisting}{code/projects.awk}{85}} \@writefile{toc}{\contentsline {section}{\numberline {A.8}util.awk}{88}} \@writefile{lol}{\contentsline {lstlisting}{code/util.awk}{88}} \@writefile{toc}{\contentsline {chapter}{\numberline {B}Example Dataset and Project Descriptions}{96}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{sect:dataset}{{B}{96}} \@writefile{toc}{\contentsline {section}{\numberline {B.1}NASA93 Project Descriptions}{96}} \@writefile{toc}{\contentsline {subsection}{\numberline {B.1.1}NASA Ground Software}{96}} \@writefile{lol}{\contentsline {lstlisting}{code/projects/ground}{96}} \@writefile{toc}{\contentsline {subsection}{\numberline {B.1.2}NASA Flight Software}{96}} \@writefile{lol}{\contentsline {lstlisting}{code/projects/flight}{96}} \@writefile{toc}{\contentsline {subsection}{\numberline {B.1.3}NASA Orbital Space Plane (OSP)}{97}} \@writefile{lol}{\contentsline {lstlisting}{code/projects/osp}{97}} \@writefile{toc}{\contentsline {subsection}{\numberline {B.1.4}NASA Orbital Space Plane 2 (More Limited Scope)}{97}} \@writefile{lol}{\contentsline {lstlisting}{code/projects/osp2}{97}} \@writefile{toc}{\contentsline {section}{\numberline {B.2}NASA93 Historical Data for Defects, Effort, and Months}{98}} \@writefile{lol}{\contentsline {lstlisting}{code/datasets/nasa93.dat}{98}} \bibstyle{plain} \bibdata{refs,local_refs,myrefs,thesis} \bibcite{endres03}{1} \bibcite{aamod94}{2} \bibcite{andrews07}{3} \bibcite{azzeh08}{4} \bibcite{baker07}{5} \bibcite{bartlett32}{6} \bibcite{boehm81}{7} \bibcite{boehm00a}{8} \bibcite{Boehm96softwarecost}{9} \bibcite{boehm00b}{10} \bibcite{brady10b}{11} \bibcite{brady10a}{12} \bibcite{brooks75}{13} \bibcite{chulani99}{14} \bibcite{craw94}{15} \bibcite{Dil84}{16} \bibcite{me10f}{17} \bibcite{fenton99}{18} \bibcite{fenton07}{19} \bibcite{gof95}{20} \bibcite{me09i}{21} \bibcite{hall03}{22} \bibcite{harman04}{23} \bibcite{holzmann97}{24} \bibcite{jalali08}{25} \bibcite{jorgensen05}{26} \bibcite{kemerer87}{27} \bibcite{journals/ese/KhoshgoftaarS03}{28} \bibcite{kirkpatrick83}{29} \bibcite{Kirsopp2002}{30} \bibcite{me10c}{31} \bibcite{kol93}{32} \bibcite{kolodner83}{33} \bibcite{leake05}{34} \bibcite{leake96}{35} \bibcite{Li2009}{36} \bibcite{Lipowezky1998}{37} \bibcite{loftus03}{38} \bibcite{lowry10}{39} \bibcite{Mendes2003}{40} \bibcite{me09f}{41} \bibcite{me07g}{42} \bibcite{me08c}{43} \bibcite{me07f}{44} \bibcite{me01c}{45} \bibcite{me09a}{46} \bibcite{me09e}{47} \bibcite{me05a}{48} \bibcite{me02f}{49} \bibcite{me10b}{50} \bibcite{me00f}{51} \bibcite{me10a}{52} \bibcite{miller02}{53} \bibcite{miyazaki94}{54} \bibcite{mozina04}{55} \bibcite{ruhe09}{56} \bibcite{orrego09}{57} \bibcite{ostrand04}{58} \bibcite{pendharkar05}{59} \bibcite{pfahl05}{60} \bibcite{port08}{61} \bibcite{norvig03}{62} \bibcite{schank83}{63} \bibcite{shank77}{64} \bibcite{schulz10}{65} \bibcite{selman96}{66} \bibcite{shepperd07}{67} \bibcite{shepperd97}{68} \bibcite{She02}{69} \bibcite{zimmermann09}{70} \bibcite{tosun10}{71} \bibcite{Walkerden1999}{72} \bibcite{watson98}{73} \bibcite{witten05}{74} \bibcite{wolpert94}{75} \bibcite{zhang07a}{76}