\relax \citation{Boehm1981} \citation{Shepperd1996} \citation{Auer2006} \citation{Pendharkar2005} \citation{Mendes2008a} \citation{Li2009} \citation{Menzies2010xx,koc2011} \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}} \newlabel{sec:introduction}{{1}{1}} \citation{baker07,Menzies2006} \citation{Mendes2003,Auer2006,baker07,Li2009} \citation{Boehm1981} \citation{kemerer87} \@writefile{toc}{\contentsline {section}{\numberline {2}Motivation}{2}} \newlabel{sec:motivation}{{2}{2}} \citation{Auer2006,Walkerden1999,Kirsopp2003,Shepperd1997,Shepperd1996,Kadoda2000,Li2008,Li2006,Li2007,Li2009,keung2008a,keung2008b,keung2008c} \citation{Myrtveit} \citation{Menzies2010xx} \citation{Menzies2010xx} \citation{Menzies2006} \citation{koc2011} \citation{Boehm1981} \citation{Menzies2010xx} \citation{koc2011jj} \citation{Mendes2002,Mendes2003} \citation{Briand1999,jeffery2001,Mendes2002,Mendes2003} \citation{Palpanas2003,John1995,Frank03} \citation{Browman1999} \citation{Browman1999} \citation{Mendes2002,Mendes2003} \citation{Browman1999} \citation{Browman1999} \citation{Shepperd2007} \citation{Jorgensen2004} \citation{Boehm2000} \@writefile{toc}{\contentsline {section}{\numberline {3}On the Value of Negative Results}{4}} \newlabel{sec:negative-results}{{3}{4}} \citation{Boehm1981} \citation{Menzies2006,Kadoda2001} \citation{Briand1997,Klas2008} \citation{Trendowicz2006} \citation{Shepperd1996} \citation{Mendes2003} \citation{Li2009} \@writefile{toc}{\contentsline {section}{\numberline {4}Background}{5}} \newlabel{sec:background}{{4}{5}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Software Effort Estimation}{5}} \newlabel{subsec:software-effort-estimation}{{4.1}{5}} \citation{Mendes2002a} \citation{Briand1999} \citation{Myrtveit1999} \citation{Angelis2000} \citation{Mendes2002a} \citation{Angelis2000} \citation{Mendes2003} \citation{Wand1994} \citation{Scheid2004} \citation{Palpanas2003} \citation{Frank03} \citation{John1995} \newlabel{equ:euclid}{{1}{6}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Kernel Density Estimation}{6}} \newlabel{subsec:kernel-estimation}{{4.2}{6}} \citation{Mendes2002,Mendes2003} \citation{Scott1992,Cressie1993} \citation{Scheid2004,Wand1994} \citation{John1995} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces We see a Gaussian kernel density estimate built on individual data points. Each point is in the center of a kernel and its effect is distributed to its neighborhood. The sum of all kernels make up the final Gaussian kernel density estimate.}}{7}} \newlabel{fig:pointwise-kernel}{{1}{7}} \newlabel{equ:general-kernel}{{2}{7}} \newlabel{equ:general-kernel-b}{{3}{7}} \newlabel{equ:general-kernel-g}{{4}{7}} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces The formulas for different kernels used in this study. In formulas $\rho = {\begingroup x - X_i\endgroup \over h}$. Note that IRWM kernel has different characteristics and its calculation details were provided in Section 4.1\hbox {}.}}{7}} \newlabel{fig:kernel-formulas}{{2}{7}} \citation{Boehm1981} \citation{Desharnais1989} \citation{Alpaydin2004} \citation{Kadoda2000} \@writefile{toc}{\contentsline {section}{\numberline {5}Methodology}{8}} \newlabel{sec:methodology}{{5}{8}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Weighting Method}{8}} \newlabel{subsec:weighting-method}{{5.1}{8}} \newlabel{equ:wkde}{{5}{8}} \newlabel{equ:weight}{{6}{8}} \newlabel{equ:effort-update}{{7}{8}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {5.1.1}Uniform vs. Non-Uniform Weighting}{8}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Data}{8}} \newlabel{fig:weighting-abe}{{5.1.1}{9}} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces In the case of ABE0 all instances are given equal probability values, hence equal weights. However, uniform kernel prefers some instances over the others: Only a certain portion of the instances are given equal non-zero weights.}}{9}} \newlabel{fig:weight-vs-non-weight}{{3}{9}} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces We used 699 projects coming from 19 datasets. Datasets have different characteristics in terms of the number of attributes as well as the measures of these attributes.}}{9}} \newlabel{fig:datasets}{{4}{9}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Experiments}{9}} \newlabel{sect:expr}{{5.3}{9}} \citation{Molokken-Ostvold2004,Briand1999,Foss} \citation{Stensrud2002} \citation{Kitchenham2001} \citation{Mendes2008a} \citation{Shepperd1997} \citation{Foss} \@writefile{toc}{\contentsline {subsection}{\numberline {5.4}Performance Criteria}{10}} \newlabel{ar}{{8}{10}} \newlabel{EquationMRE}{{9}{10}} \newlabel{EquationMdRE}{{10}{10}} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Comparing algorithms (\textit {i},\textit {j}) on performance ($P_i$,$P_j$). The ``better'' predicate changes according to $P$. For error measures like MRE, ``better'' means lower medians. However, for PRED(25), ``better'' means higher medians.}}{11}} \newlabel{FigureWinTieLossPseudocode}{{5}{11}} \@writefile{toc}{\contentsline {section}{\numberline {6}Results}{11}} \newlabel{sec:results}{{6}{11}} \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Desharnais dataset $win,tie,loss$ statistics for ABE0 and N-ABE through Gaussian kernel. For each dataset we have $4$ of these tables (one for each kernel). In total it amounts to \textit {19 Datasets $\times $ 4 tables = 76 tables}. In addition we have another \textit {19 datasets $\times $ 1 kernel = 19 tables} from IRWM kernel. It is infeasible to include all the tables in this paper, therefore an executive summary of $76+19=95$ tables is provided in Figure\nobreakspace {}7\hbox {}. Furthermore, we provide all $95$ tables in excel format at http://goo.gl/qpQiD.}}{12}} \newlabel{fig:win-tie-loss-desharnais}{{6}{12}} \@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Nine data sets comparing ABE0 to N-ABE. For every row in each cell, there are three symbols indicating the effect of N-ABE w.r.t. 3 different error measures. From left to right, the first symbol stands for N-ABE effect w.r.t. MdMRE, the second symbol w.r.t. MAR and the third one w.r.t. Pred(25). A ``$+$'' indicates that for majority of $k$ values (at least 3 out of 5 \textit {k} values), N-ABE improved ABE0 in terms of $win-loss$ values. ``$-$'' indicates that N-ABE decreased the performance of ABE0 in the majority case. If the former conditions do not satisfy, then a ``$o$'' symbol is assigned. Note that dataset order here is the same as Figure\nobreakspace {}4\hbox {}, yet the dataset names are abbreviated to 3 to 5 letters due to space constraints.}}{13}} \newlabel{fig:summary-all1}{{7}{13}} \@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Ten more data sets comparing ABE0 to N-ABE. Same format as Figure\nobreakspace {}7\hbox {}.}}{14}} \newlabel{fig:summary-all2}{{8}{14}} \@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces The comparison of ABE0 to N-ABE under IRWM kernel. Similar to Figure\nobreakspace {}7\hbox {} three symbols indicate the effect of N-ABE w.r.t. 3 different error measures and ``$+$'' indicates that for majority of $k$ values N-ABE improved ABE0 in terms of $win-loss$ values. A ``$-$'' symbol indicates a decrease and a ``$o$'' symbol indicates neither decrease nor increase. Notice that subject to IRWM kernel, N-ABE fails to improve ABE0 w.r.t. 3 different performance measures.}}{15}} \newlabel{fig:summary-irwm}{{9}{15}} \@writefile{toc}{\contentsline {section}{\numberline {7}Discussion}{15}} \newlabel{sec:discussion}{{7}{15}} \@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces An intuitive example. In a 3 analogy case, there is a $75\%$ change for a hypothetical test project between uniform and non-uniform weighting.}}{15}} \newlabel{fig:intuitive-example}{{10}{15}} \citation{Palpanas2003,Frank03,John1995} \citation{Alpaydin2004} \citation{Milic2004} \citation{Robson2002} \citation{Kitchenham2001} \citation{Kitchenham2009} \@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {50 Sample Points: Note the bad fit due to low sample size.}}}{16}} \@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {100 Sample Points: Note the better fit due to increased sample size.}}}{16}} \@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {1000 Sample Points: Note the optimum fit due to high sample size.}}}{16}} \@writefile{toc}{\contentsline {section}{\numberline {8}Threats to Validity}{16}} \newlabel{sec:threats-to-validity}{{8}{16}} \newlabel{fig:50-sample-points}{{11(a)}{17}} \newlabel{sub@fig:50-sample-points}{{(a)}{17}} \newlabel{fig:100-sample-points}{{11(b)}{17}} \newlabel{sub@fig:100-sample-points}{{(b)}{17}} \newlabel{fig:1000-sample-points}{{11(c)}{17}} \newlabel{sub@fig:1000-sample-points}{{(c)}{17}} \@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces The effect of sample size and bandwidth on kernel density estimation. The choice of optimum bandwidth (\textit {h} value) is important. However, even with the optimum bandwidth, one still needs enough number of samples for successful estimation. Sample size of $50$ appears to be too small and when we increase it to $100$, we get a better fit. Yet, for a very close fit, we need to go up to $1000$ sample points.}}{17}} \newlabel{fig:sample-size-kernel-estimation}{{11}{17}} \citation{Palpanas2003,John1995} \@writefile{toc}{\contentsline {section}{\numberline {9}Conclusions}{18}} \newlabel{sec:conclusions}{{9}{18}} \@writefile{toc}{\contentsline {subsection}{\numberline {9.1}Answers To Research Questions}{18}} \@writefile{toc}{\contentsline {section}{\numberline {10}Future Work}{18}} \newlabel{sec:future-work}{{10}{18}} \bibstyle{abbrv} \bibdata{myref} \bibcite{Alpaydin2004}{1} \bibcite{Angelis2000}{2} \bibcite{Auer2006}{3} \bibcite{baker07}{4} \bibcite{Boehm2000}{5} \bibcite{Boehm1981}{6} \bibcite{Briand1997}{7} \bibcite{Briand1999}{8} \bibcite{Browman1999}{9} \bibcite{Cressie1993}{10} \bibcite{Desharnais1989}{11} \bibcite{Foss}{12} \bibcite{Frank03}{13} \bibcite{jeffery2001}{14} \bibcite{John1995}{15} \bibcite{Jorgensen2004}{16} \bibcite{Jorgensen2007}{17} \bibcite{Kadoda2000}{18} \bibcite{kemerer87}{19} \bibcite{keung2008a}{20} \bibcite{keung2008c}{21} \bibcite{koc2011jj}{22} \bibcite{keung2008b}{23} \bibcite{Kirsopp2003}{24} \bibcite{Kitchenham2009}{25} \bibcite{Kitchenham2001}{26} \bibcite{Klas2008}{27} \bibcite{koc2011}{28} \bibcite{Li2006}{29} \bibcite{Li2008}{30} \bibcite{Li2007}{31} \bibcite{Li2009}{32} \bibcite{Mendes2002a}{33} \bibcite{Mendes2008a}{34} \bibcite{Mendes2002}{35} \bibcite{Mendes2003}{36} \bibcite{Menzies2006}{37} \bibcite{Menzies2010xx}{38} \bibcite{Milic2004}{39} \bibcite{Molokken-Ostvold2004}{40} \bibcite{Myrtveit1999}{41} \bibcite{Myrtveit}{42} \bibcite{Palpanas2003}{43} \bibcite{Pendharkar2005}{44} \bibcite{Robson2002}{45} \bibcite{Scheid2004}{46} \bibcite{Scott1992}{47} \bibcite{Shepperd2007}{48} \bibcite{Kadoda2001}{49} \bibcite{Shepperd1997}{50} \bibcite{Shepperd1996}{51} \bibcite{Stensrud2002}{52} \bibcite{Trendowicz2006}{53} \bibcite{Walkerden1999}{54} \bibcite{Wand1994}{55} \citation{Jorgensen2007} \citation{Browman1999} \citation{Browman1999} \citation{Mendes2002,Mendes2003} \citation{Browman1999} \citation{Browman1999} \citation{Kitchenham2001}