\documentclass[journal]{IEEEtran/IEEEtran}

\newenvironment{smallitem}{
   \setlength{\topsep}{0pt}
   \setlength{\partopsep}{0pt}
   \setlength{\parskip}{0pt}
   \begin{itemize}
   \setlength{\leftmargin}{.2in}
   \setlength{\parsep}{0pt}
   \setlength{\parskip}{0pt}
   \setlength{\itemsep}{0pt}}{\end{itemize}}

\newenvironment{smallnums}{
   \setlength{\topsep}{0pt}
   \setlength{\partopsep}{0pt}
   \setlength{\parskip}{0pt}
   \begin{enumerate}
   \setlength{\leftmargin}{.2in}
   \setlength{\parsep}{0pt}
   \setlength{\parskip}{0pt} \setlength{\itemsep}{0pt}}{\end{enumerate}}

\newenvironment{hhh}{
  \begin{hypo}[?]}{\end{hypo}}
\newenvironment{yup}{
  \begin{hypo}[\ding{51}]}{\end{hypo}}
\newenvironment{nupe}{
  \begin{hypo}[\ding{55}]}{\end{hypo}}

\newcommand{\boxplot}[5]{\begin{picture}(100,10)
\put(0,0){\line(0,1){8}}
\put(100,0){\line(0,1){8}}
\put(#1,4){\line(1,0){#2}}
\put(#3,4){\circle*{6}}
\put(#4,4){\line(1,0){#5}}
\put(50,0){\line(0,1){8}}
\end{picture}}

\newcommand{\aive}{{\"aive}}
\newcommand{\bi}{\begin{smallitem}}
\newcommand{\ei}{\end{smallitem}}
\newcommand{\be}{\begin{smallnums}}
\newcommand{\ee}{\end{smallnums}}
\newcommand{\bd}{\begin{description}}
\newcommand{\ed}{\end{description}}
\newcommand{\fig}[1]{Figure~\ref{fig:#1}}
\newcommand{\eq}[1]{Equation~\ref{eq:#1}}
\newcommand{\hyp}[1]{Hypothesis~\ref{hyp:#1}}


% some very useful LaTeX packages include:
\usepackage{alltt}
\usepackage{cite}      % Written by Donald Arseneau
                        % V1.6 and later of IEEEtran pre-defines the format
                        % of the cite.sty package \cite{} output to follow
                        % that of IEEE. Loading the cite package will
                        % result in citation numbers being automatically
                        % sorted and properly "ranged". i.e.,
                        % [1], [9], [2], [7], [5], [6]
                        % (without using cite.sty)
                        % will become:
                        % [1], [2], [5]--[7], [9] (using cite.sty)
                        % cite.sty's \cite will automatically add leading
                        % space, if needed. Use cite.sty's noadjust option
                        % (cite.sty V3.8 and later) if you want to turn this
                        % off. cite.sty is already installed on most LaTeX
                        % systems. The latest version can be obtained at:
                        % http://www.ctan.org/tex-archive/macros/latex/contrib/supported/cite/

\usepackage[pdftex]{graphicx}
%\fi
% However, be warned that pdflatex will require graphics to be in PDF
% (not EPS) format and will preclude the use of PostScript based LaTeX
% packages such as psfrag.sty and pstricks.sty. IEEE conferences typically
% allow PDF graphics (and hence pdfLaTeX). However, IEEE journals do not
% (yet) allow image formats other than EPS or TIFF. Therefore, authors of
% journal papers should use traditional LaTeX with EPS graphics.
%
% The path(s) to the graphics files can also be declared: e.g.,
% \graphicspath{{../eps/}{../ps/}}
% if the graphics files are not located in the same directory as the
% .tex file. This can be done in each branch of the conditional above
% (after graphicx is loaded) to handle the EPS and PDF cases separately.
% In this way, full path information will not have to be specified in
% each \includegraphics command.
%
% Note that, when switching from latex to pdflatex and vice-versa, the new
% compiler will have to be run twice to clear some warnings.

\usepackage{subfigure} % Written by Steven Douglas Cochran
                        % This package makes it easy to put subfigures
                        % in your figures. i.e., "figure 1a and 1b"
                        % Docs are in "Using Imported Graphics in LaTeX2e"
                        % by Keith Reckdahl which also documents the graphicx
                        % package (see above). subfigure.sty is already
                        % installed on most LaTeX systems. The latest version
                        % and documentation can be obtained at:
                        % http://www.ctan.org/tex-archive/macros/latex/contrib/supported/subfigure/

\usepackage{url}       % Written by Donald Arseneau
                        % Provides better support for handling and breaking
                        % URLs. url.sty is already installed on most LaTeX
                        % systems. The latest version can be obtained at:
                        % http://www.ctan.org/tex-archive/macros/latex/contrib/other/misc/
                        % Read the url.sty source comments for usage information.

\usepackage{amssymb}
\usepackage{amsmath}   % From the American Mathematical Society
                        % A popular package that provides many helpful commands
                        % for dealing with mathematics. Note that the AMSmath
                        % package sets \interdisplaylinepenalty to 10000 thus
                        % preventing page breaks from occurring within multiline
                        % equations. Use:
\interdisplaylinepenalty=2500
                        % after loading amsmath to restore such page breaks
                        % as IEEEtran.cls normally does. amsmath.sty is already
                        % installed on most LaTeX systems. The latest version
                        % and documentation can be obtained at:
                        % http://www.ctan.org/tex-archive/macros/latex/required/amslatex/math/


\usepackage{pifont}
%\usepackage[dvips]{color}
\begin{document}

\title{Feature Subset Selection Methods for\\COCOMO Based Software Effort Estimation}
\author{Daniel~Baker\thanks{Dan Baker is with the Lane Department of Computer Science,
 West Virginia University and can be reached at \protect\url{danielryanbaker@gmail.com}},Tim~Menzies~\IEEEmembership{Member,~IEEE}% <-this % stops a space
\thanks{Dr. Menzies is with the Lane Department of Computer
Science, West Virginia University and can be reached at \protect\url{tim@menzies.us}}
\thanks{This research was conducted with funds from the NASA
Software Assurance Research Program led by the
NASA IV\&V Facility.
}
\thanks{Manuscript received January 1, 2006; revised XXX YY, 200Z.}}% <-this % stops a space
\markboth{IEEE Transactions on Software Engineering,~Vol.~W, No.~X,~YYY~200Z}{Baker, Menzies, Feature Subset Selection}
\maketitle

\begin{abstract}
This paper demonstrates the results of feature subset selection methods for the COCOMO model.
\end{abstract}

\section{Introduction}
\PARstart{I}{n} 2005...(START WITH QUOTE AND REFERENCE ABOUT MONEY LOST FROM INACURRATE SOFTWARE COST ESTIMATION/BUDGETING)
Currently cost estimation is accomplished using models such as COCOMO~\cite{Boehm81}, (LIST MANY OTHER METHODS WITH REFERENCES).
These models predict the development cost for a new software project based on past project data.
For an accurate prediction the training data needs quantity, quality, and relevance to the new project(ADD REFERENCES).
Unfortunately this is difficult in practice and estimates are often made using inadequate training data. 
Consequently, these models are plagued with problems including highly inaccurate predictions(FIND BEST REFERENCE) and the variance problem(INSERT REFERENCE).
It has been shown that the variance can be reduced by feature subset selection methods that discard irrelevant, redundant, noisy, and unreliable attributes(LOTS OF REF HERE).
This paper explores some of these attribute pruning techniques.

%\section{Background: COCOMO and COSEEKMO}
%\PARstart{P}{ull} text from COSEEKMO paper.  First explain COCOMO.  Then justify FSS.  Then explain COSEEKMO.  Basically say that
%COSEEKMO worked but was too slow, so we decided to try the minimal approach (attr - next section).  Given that the
%datasets were small, we decided extensive searches were ok and tried a complete approach instead of heuristics (next section on cocomost).


 \section{Background}

\subsection{COCOMO}

The case study material for this paper uses
 COCOMO-format data.
 COCOMO (the COnstructive COst MOdel)   was originally developed by Barry
Boehm in 1981~\cite{Boehm81} and was extensively revised in
2000~\cite{Boehm00b}.  
%% COCOMO helps software developers
%% reason about the cost and schedule implications of their software
%% decisions such as software investment decisions; setting project budgets
%% and schedules; negotiating cost, schedule, and performance
%% tradeoffs; making software risk management decisions, and making
%% software improvement decisions.
%% One advantage of COCOMO (and this is why we use it)
%% is that unlike  many other costing models such as SLIM or SEER,
%% COCOMO is an {\em
%% open model} with numerous published data~\cite{Boehm81, Boehm00b}.
%COCOMO
%measures effort in calendar months where one month is 152
% hours (and includes development and management hours). 
The core
intuition behind COCOMO-based estimation is that
as a program grows
in size, the development effort  grows exponentially. More specifically:



{\footnotesize
\begin{equation}\label{eq:one}
effort(person months)=a*\left(KLOC^b\right)*\left(\prod_jEM_j\right)
\end{equation}
}
%XXXwhat about repeatability (folks repeating other people's work), straw man.
%also, acceptable if >= cocomo1 stratified results
%competent if pred(30) cocomo stratified results:sp
%good if pred(25)> 83%
Here, $KLOC$
is thousands of  delivered source instructions. $KLOC$ can be
 estimated
directly or via a {\em function point estimation}. Function points
are a
 product of five defined data components (inputs, outputs, inquiries,
files, external interfaces) and 14 weighted environment characteristics
(data comm, performance, reusability, etc.)~\cite{Boehm00b,Capers98}.
A 1,000 line Cobol program would typically implement about 14
function points,
 while a 1,000-line C program would implement about
seven\footnote{\url{http://www.qsm.com/FPGearing.html}}.


% XXX
In \eq{one}, $EM_j$ is one of
{\em effort multipliers} such as {\em cplx} (complexity)
or {\em pcap} (programmer capability).
In order to model the effects of $EM_j$ on development effort,
Boehm
proposed reusing numeric values 
which he generated via
regression on historical data for each value of $EM_i$.

%\begin{figure}[!t] {\footnotesize
%\begin{center}
%\begin{tabular}{r|r|r|p{1.8in}}
%\noindent
%Mode & a & b & notes\\\hline
%Organic & 3.2 & 1.05 & projects from relatively small software teams develop software
%in a highly familiar, in-house environment.\\\hline
%Embedded & 2.8& 1.2 & projects operating within (is embedded in) a strongly
%coupled complex of hardware, software, regulations, and operational
%procedures.\\\hline
%Semi-Detached & 3.0 & 1.12 & An intermediary mode between organic and embedded.
%\end{tabular}
%\end{center}}
%\caption{Standard COCOMO 81 development modes.}\label{fig:eso}
%\end{figure}
%\begin{figure*}{\footnotesize
%\begin{center}
%\begin{tabular}{|p{0.35in}|p{6.5in}|}\hline
%~\newline
%~\newline
%Data sources&\bd
%\item[{\em Coc81:}] 63 records in the
%COCOMO 81 format. Source: \cite[p496-497]{Boehm81}. \newline Download from 
%\url{http://unbox.org/wisp/trunk/cocomo/data/coc81modeTypeLangType.csv}.
%\item[{\em Nasa93:}] ~~~ 93 NASA records in the COCOMO 81 format. \newline 
%Download from \url{http://unbox.org/wisp/trunk/cocomo/data/nasa93.csv}.
%\item[{\em CocII:}] 161  records in the COCOMO II format from the COCOMO consortium
%(co-ordinated by USC). This data is not in the public domain.
%\ed\\\hline
%~\newline
%~\newline
%Data \newline
%subsets&\bd
%\item[{\em All:}] selects all records from a particular source;
%e.g. "coc81\_all''.
%\item[{\em Category:}] ~~~~~is a NASA-specific
% designation selecting the type of project; e.g. avionics, data capture, etc.
%\item[{\em Dev:}] indicates the development methodology; e.g. div.waterfall.
%\item[{\em DevEnd:}] ~~~shows the last year of the software project.
%\item[{\em Fg:}] selects either ``$f$'' (flight) or ``$g$'' (ground) software.
%\item[{\em Kind:}] selects records relating
%to the development platform; max= mainframe and mic= microprocessor.
%\item[{\em Lang:}] selects records about different development languages.
%\item[{\em Project} and {\em center}:]~~~~~~~~~~~~~~~  $nasa93$
% designations selecting records relating to where the software was built and the name
%of the project.
%\item[{\em Mode=e:}]~\newline
%selects records relating to
%the {\em embedded}  COCOMO 81 development mode. The different COCOMO 81 development models were described
%in \fig{eso}.
%\item[{\em Mode=o: }]~\newline
%selects COCOMO 81 {\em organic} mode records.
%\item[{\em Mode=sd: }]~\newline
%selects COCOMO 81 {\em semi-detached} mode records.
%\item[{\em Org:}] is a $cocII$ designation showing  what organization provided the data.
%\item[{\em Size:}] is a $cocII$ specific designation grouping the records into (e.g.) those around 100KLOC.
%\item[{\em Type:}] selects different $coc81$  designations and include ``bus'' (for business application)
%or ``sys'' (for system software).
%\item[{\em Year:}] is a $nasa93$ term that
%selects the development years, grouped into units of five; e.g. 1970,1971,1972,1973,1974
%are  labeled ``1970''.
%\ed
%\\\hline
%\end{tabular}
%\end{center}}
%\caption{Data sets (top) and
%parts (bottom) of the data used in this study.}\label{fig:parts}
%\end{figure*}

In practice, effort data forms exponential distributions. Appendix B describes
methods for using such distributions in effort modeling.


Note that in COCOMO 81,
Boehm identified three common types of software:
{\em embedded, semi-detached},  and {\em organic}.
Each has their
own characteristic ``$a$'' and ``$b$'' (see  \fig{eso}).
COCOMO-II ignores these distinctions. 
This study used data sets in both the COCOMO 81 and COCOMO-II format.
For more on the differences between COCOMO 81
and COCOMO-II, see Appendix A. 

\subsection{Data}\label{sec:data}

The software project data we used in this study came from two sources (see \fig{parts}).
$Coc81$ is the original COCOMO data
used by Boehm to calibrate COCOMO 81.  $Nasa93$ comes from a NASA-wide database
recorded in the COCOMO 81 format. 
This data has been in
the public domain for several years but few have been aware of it.
It can now be found on-line in several places including
the PROMISE (Predictor Models in Software
Engineering) web site\footnote{\url{http://promise.site.uottawa.ca/SERepository/} and 
\url{http://unbox.org/wisp/trunk/cocomo/data}.}.
$Nasa93$
was
originally collected to create a NASA-tuned version of
COCOMO,
funded by the Space
Station Freedom Program.
$Nasa93$ contains data from six  NASA centers including the Jet Propulsion
Laboratory.
Hence,
it covers a very
wide range of software domains, development
processes, languages, and complexity as well as fundamental differences
in culture and business practices between each center. All of these
factors contribute to the large variances observed in this data
set.

When the $nasa93$ data was collected, it was
required that there be multiple interviewers with one person
leading the interview and one or two others recording and checking
documentation.  Each data point was cross-checked with either
official records or via independent subjective inputs from other
project personnel who fulfilled various roles on the project.  After
the data was translated into the COCOMO 81 format, the data was
reviewed with those who originally provided the data.  Once sufficient
data existed the data was analyzed to identify outliers and the
data values were re-verified with the development teams once again
if deemed necessary.  This typically required from two to four trips
to each NASA center.  All of the supporting information was placed
in binders, which we still on occasion reference even today. 

Using Boehm's COCOMO-I ``local calibration`` the $nasa93$ data has been shown to contain large deviations
due to the wide variety of projects in that data set,
and {\em not} poor data collection (ADD REFERENCE TO COSEEKMO PAPER). Our belief is that $nasa93$ was collected
using methods equal to, or better, than standard industrial practice. If so, then
industrial data would suffer from deviations equal to or lager than those seen in the $nasa93$ data.

\begin{figure}[!t] {\footnotesize
\begin{center}
\begin{tabular}{r|r|r|p{1.8in}}
\noindent
Mode & a & b & notes\\\hline
Organic & 3.2 & 1.05 & projects from relatively small software teams develop software
in a highly familiar, in-house environment.\\\hline
Embedded & 2.8& 1.2 & projects operating within (is embedded in) a strongly
coupled complex of hardware, software, regulations, and operational
procedures.\\\hline
Semi-Detached & 3.0 & 1.12 & An intermediary mode between organic and embedded.
\end{tabular}
\end{center}}
\caption{Standard COCOMO 81 development modes.}\label{fig:eso}
\end{figure}
\begin{figure*}{\footnotesize
\begin{center}
\begin{tabular}{|p{0.35in}|p{6.5in}|}\hline
~\newline
~\newline
Data sources&\bd
\item[{\em Coc81:}] 63 records in the
COCOMO 81 format. Source: \cite[p496-497]{Boehm81}. \newline Download from 
\url{http://unbox.org/wisp/trunk/cocomo/data/coc81modeTypeLangType.csv}.
\item[{\em Nasa93:}] ~~~ 93 NASA records in the COCOMO 81 format. \newline 
Download from \url{http://unbox.org/wisp/trunk/cocomo/data/nasa93.csv}.
\item[{\em CocII:}] 161  records in the COCOMO II format from the COCOMO consortium
(co-ordinated by USC). This data is not in the public domain.
\ed\\\hline
~\newline
~\newline
Data \newline
subsets&\bd
\item[{\em All:}] selects all records from a particular source;
e.g. "coc81\_all''.
\item[{\em Category:}] ~~~~~is a NASA-specific
 designation selecting the type of project; e.g. avionics, data capture, etc.
\item[{\em Dev:}] indicates the development methodology; e.g. div.waterfall.
\item[{\em DevEnd:}] ~~~shows the last year of the software project.
\item[{\em Fg:}] selects either ``$f$'' (flight) or ``$g$'' (ground) software.
\item[{\em Kind:}] selects records relating
to the development platform; max= mainframe and mic= microprocessor.
\item[{\em Lang:}] selects records about different development languages.
\item[{\em Project} and {\em center}:]~~~~~~~~~~~~~~~  $nasa93$
 designations selecting records relating to where the software was built and the name
of the project.
\item[{\em Mode=e:}]~\newline
selects records relating to
the {\em embedded}  COCOMO 81 development mode. The different COCOMO 81 development models were described
in \fig{eso}.
\item[{\em Mode=o: }]~\newline
selects COCOMO 81 {\em organic} mode records.
\item[{\em Mode=sd: }]~\newline
selects COCOMO 81 {\em semi-detached} mode records.
\item[{\em Org:}] is a $cocII$ designation showing  what organization provided the data.
\item[{\em Size:}] is a $cocII$ specific designation grouping the records into (e.g.) those around 100KLOC.
\item[{\em Type:}] selects different $coc81$  designations and include ``bus'' (for business application)
or ``sys'' (for system software).
\item[{\em Year:}] is a $nasa93$ term that
selects the development years, grouped into units of five; e.g. 1970,1971,1972,1973,1974
are  labeled ``1970''.
\ed
\\\hline
\end{tabular}
\end{center}}
\caption{Data sets (top) and
parts (bottom) of the data used in this study.}\label{fig:parts}
\end{figure*}


\subsection{Performance Measures}

The performance of  models generating continuous output can be
assessed in many ways, including PRED(30), MMRE, correlation, etc.
PRED(30) is a measure calculated from
the relative error, or RE, which is the relative
size of the difference between the actual and estimated value.
One way to view these measures is to say that training data contains records
with variables $1,2,3,..,N$ and performance measures add additional new variables $N+1,N+2,...$.


The magnitude of the relative error, or MRE, is the absolute value of that relative error:

{\small
\[
MRE = |predicted - actual|/actual
\]}\noindent
The mean magnitude of the relative error, or MMRE, is the average percentage
of the absolute values of the relative errors over an entire data set.
MMRE results were shown in \fig{one} in the {\em mean\% average test error} column.
Given  $T$ tests, MMRE is calculated
as follows:

{\small
\[MMRE = \frac{100}{T}\sum_i^T\frac{|predicted_i-actual_i|}{actual_i}\]}\noindent


PRED(N) reports the average percentage of estimates that were within
N\% of the actual values. Given $T$ tests, then:

{\small
\[
PRED(N) = \frac{100}{T} \sum_i^T\left\{
\begin{array}{l}
1\;if\;MRE_i\;{\le}\;\frac{N}{100}\\
0\;otherwise
\end{array}\right.
\]}\noindent
For example, a PRED(30)=50\% means that half the estimates are within
30\% of the actual.

Another performance measure of a  model predicting numeric values is the correlation
between predicted and actual values.  Correlation
ranges from +1 to -1 and a correlation of +1 means that there is a
perfect positive linear relationship between variables. Appendix C shows how to
 calculate correlation.

All  these performance measures
(correlation, MMRE and PRED) address subtly different issues.
Overall, PRED measures how {\em well} an effort model performs while
MMRE measures  {\em poor} performance.
A single large mistake can skew the MMREs and not effect the PREDs.
Shepperd and Schofield comment that:
\begin{quote}
MMRE is fairly conservative with a bias
against overestimates while PRED(30) will identify those
prediction systems that are generally accurate but occasionally
wildly inaccurate~\cite[p736]{Shepperd97}.
\end{quote}

Since they measure different aspects of model
performance, COSEEKMO  
uses combinations of   PRED, MMRE, and correlation (using the methods
described later in this paper).


\section{Attribute Ranking Driven Greedy Search Feature Subset Selection}
\PARstart{F}{ollowing} the slow but steady success of COSEEKMO, it becomes necessary to find a similarly accurate
algorithm which is fast enough to be used by business users in the real world.  We decided to begin the search for
such an algorithm using a minimal approach, that is, a feature subset selection algorithm that used a greedy search
to minimize evaluations of the attribute space.  We decided to rank the attributes using correlation and to use the
ranked results as the order to grow the attribute set in the case of a forward select greedy search, or prune the set
in the case of a backward elimination greedy search.  At each step the attribute set is evaluated with
the MMRE, Pred(30), deviation, and correlation from using COCOMO regression with the attribute set in question.
If the change is considered an
improvement then it is kept and the search continues, otherwise it stops.  We introduced a horizon variable that could
be set to allow for the search to continue even without improvement.  An extensive experiment was run to benchmark the
many customizations of the greedy FSS approach against standard COCOMO least squares regression as described in the
pseudocode in \fig{attr}.  The results are shown in \fig{attrCOC81} and \fig{attrNASA}.  For the COC81 dataset,
standard COCOMO least squares regression was found to be far superior.  However, for the NASA93 dataset, many
customizations of the greedy feature subset selection had similar or slighty better MMRE and Pred(30) values, and lower deviations.
(refer to background on datasets to explain this).

Although quick, this algorithm is not accurate enough to be an acceptable replacement for even
avoiding feature subset selection entirely.  Although it had some slight success with the NASA93 dataset it
failed to provide similar results to COCOMO regression on the COC81 dataset.
Noticing that the results tended to improve with higher horizon values, we reasoned that
the attribute space needed more exploration.  Finally, with a dataset as small as the COC81 and NASA93 datasets,
the importance of heuristics to prune the state space explosion were examined.  Next we decided to build an efficient
algorithm that evaluated all 32,768 attribute combinations.

\begin{figure}[!b]
{\scriptsize\begin{alltt}
for data in dataSets
  for i in 1 to 30
    test = randomRecords(data,10)
    train = data - test
    attributes = (all COCOMO 81 attributes)
    results = LC(test, train, attributes)
    print variables and results
    for attribute in COCOMO_81_attributes
      rank correlation using LC(train, train, attribute)
      rankedList += sorted list of ranked attributes
    for search in "forward backward"
      for horizon in "0 1 2 4 8 16"
        for eval in "mmre sd_mre pred30 correlation"
          attr(test, train, search, horizon, eval, rankedList)

attr()
  for attribute in rankedList
    newSet = bestSet + attribute
    if (search==backward)
      tmpSet=inverse(newSet)
    else
      tmpSet = newSet
    oldScore = newScore
    results = LC(train, train, tmpSet)
    newScore = results.eval
    if (newScore better than oldScore)
      bestSet = newSet
      stale = horizon
    else
      newScore = oldScore
      stale--
    if (stale < 1)
      exit for
  next attribute
  if (search==backward)
    bestSet=inverse(bestSet)
  finalResults = LC(test, train, bestSet)
  print variables and results
\end{alltt}}
\caption{This pseudocode outlines an experiment that compared standard COCOMO 81 local calibration
with an approach that ranks the attributes based on correlation, and then builds a subset of
attributes using a greedy search guided by the attribute ranking.
}\label{fig:attr}
\end{figure}


\section{COCOMOST}

%\begin{figure}[!t]
\begin{figure}[!b]
{\scriptsize\begin{alltt}
{\bf for} data {\bf in} dataSets
  {\bf for} i {\bf in} 1 {\bf to} 30
      test       =  randomRecord(data)
      train      =  data - test
      attributes =  (all COCOMO 81 attributes)
      results    =  LC(test, train, attributes)
      print variables and results
      attributes =  cocomost(train)
      results    =  LC(test, train, attributes)
      print variables and results
\end{alltt}}
\caption{This experiment benchmarks standard COCOMO-based local calibration against
local calibration that uses cocomost to perform feature subset selection.
}\label{fig:study}
\end{figure}

%\begin{figure}[!t]
\begin{figure}[!b]
{\scriptsize\begin{alltt}
attributes = null
bestMMRE = LC(train, attributes)
bestAttributes = null
{\bf for} attributes {\bf in} 2^15
  newMMRE = LC(train, attributes)
  if newMMRE > bestMMRE
      bestMMRE = newMMRE
      bestAttributes = attributes

return bestAttributes
\end{alltt}}
\caption{Cocomost performs a complete search over the attribute space and evaluates the attribute
sets using the target learner: COCOMO-based local calibration.
}\label{fig:cocomost}
\end{figure}

\PARstart{T}{he} COCOMOST algorithm, as outlined in \fig{cocomost}, uses feature subset selection to prune irrelevant, redundant, noisy, and unreliable attributes from the COCOMO model.
It executes a complete search of the attribute space, evaluating attribute sets using local calibration.
Thus, it is a ``wrapper`` attribute selection technique instead of a ``filter`` because it evaluates using the target learner~\cite{Hall03}.
However, it shares several of the advantages of a filter.  Unlike most wrappers, COCOMOST is fast enough to search the entire attribute space instead of using heuristics to limit the state space explosion.
This introduces the vulnerability that every attribute beyond the 15 used in this study will double COCOMOST's execution time.

To compare the effectiveness of standard local calibration versus feature subset selection with COCOMOST followed by local calibration, we randomly pulled a test set from the training data 30 times and made estimates with each learner as described in \fig{study}.

%\section{Data}
%\PARstart{T}{he} training data used in the experiments are the coc81 and nasa93 datasets available at \protect\url{http://unbox.org/wisp/trunk/data}.
%The coc81 dataset comes from Boehm~\cite{Boehm81} and the nasa93 dataset contains project metrics from NASA software projects that have been sanitized for public use.
%In addition, the coc81 dataset contains 6 stratifications, and the nasa93 dataset contains 11.
%The learners were run on all 19 of these datasets to create the experimental results.


  
  
  
\section{Results}

\begin{figure}[!t]
\begin{center}
%[{\includegraphics[width=2.5in,clip,keepaspectratio]{stuff/mmre.jpg}}]
[{\includegraphics[width=2.5in,clip,keepaspectratio]{stuff/mmre.pdf}}]
\caption{Mean Magnitude of Relative Error}
\label{fig:mmre}
\end{center}
\end{figure}

\begin{figure}[!t]
\begin{center}
%[{\includegraphics[width=2.5in,clip,keepaspectratio]{stuff/sd.jpg}}]
[{\includegraphics[width=2.5in,clip,keepaspectratio]{stuff/sd.pdf}}]
\caption{Standard Deviation of Mean Magnitude of Relative Error}
\label{fig:sd}
\end{center}
\end{figure}

(PULL QUARTILE CHARTS TEXT FROM MENZIES PAPER)

\fig{mmre} shows the mean magnitude of relative error for each of the 19 subsets of data.
Standard local calibration is shown in red and COCOMOST is in green.
The results show similar errors between the learners.
\fig{sd} is similarly structured, except it displays the standard deviation of the error.
These results show that COCOMOST greatly reduces the variance for a large portion of the datasets.

\section{Conclusion}
\PARstart{T}{he} results show the limits of feature subset selection using local calibration as the evaluation criteria.
COCOMOST produced statistically equivalent errors as Boehm's COCOMO model.
Since the attribute space was searched completely, this indicates that more involved evaluation methods or models need to be explored to reduce the error.
However, COCOMOST was able to reduce the variance in the model which will make it easier to distinguish rival methods.
This will provide a stepping stone for further research.
In addition, for this evaluation method, heuristics were found to be less effective and slower than a finely tuned complete search of the attribute space.

\section{Future Work}
\PARstart{T}{here} is a lot of work to be done in improving software cost estimation models.  Some possibilities suggested by this research include:
\bi
\item
Feature Subset Selection using other evaluation methods and learners.
\item
Expanding upon COCOMOST with more data mining techniques such as bagging(ADD REFERENCE).
\item
More comparisons of learners using COCOMOST as a ``strawman`` FSS model.
\item
Due to COCOMOST's fairly quick runtime it could be used by WRAPPER algorithms (add ref?) such as COSEEKMO.
\ei


\bibliographystyle{IEEEtranBST/IEEEtran.bst}
\bibliography{myrefs,../../../../../tex/refs}

\begin{biography}[{\includegraphics[width=1in,clip,keepaspectratio]
{stuff/timbeach}}]{Tim Menzies}
is an associate professor at the University of West Virginia and works with NASA on software quality.
His recent research concerns modeling and learning, with a particular focus on lightweight modeling methods.
He received his PhD in AI and knowledge engineering from the University of New South Wales.
Contact him at tim@menzies.us.
\end{biography}

\begin{biography}[{\includegraphics[width=1in,clip,keepaspectratio]
{stuff/dan}}]{Dan Baker}
is a graduate student at West Virginia University.  He received his Bachelor's degree in Computer Science
from West Virginia University in May of 2006.  For his senior project he developed the database used in the
West Virginia Crime Reduction and Information Management Effort.
He is currently working on his Master's degree in Computer Science at West Virginia University
while working as a research assistant for Dr Menzies.  His research focuses on data mining in data starved domains such as software cost estimation.
He is the president of the student chapter of Upsilon Pi Epsilon at West Virginia University.
UPE is the first and only international honor society for the Computing Sciences.
\end{biography}


\appendix

{\bf APPENDIX A - COCOMO-I vs COCOMO-II:}
In COCOMO II,
the exponential COCOMO 81
term $b$ was expanded into the following expression:

{\small
\begin{equation}\label{eq:coc2}
b + 0.01*\sum_jSF_j
\end{equation}}
\noindent

where b is 0.91 in COCOMO II 2000, and $SF_j$ is one of five {\em
scale factors} that exponentially influence effort.  Other changes in
COCOMO II included dropping the development modes
of \fig{eso} as well as some modifications
to the  list of effort multipliers and
their associated numeric constants (see appendix E).  


{\bf APPENDIX B - Calculating Correlation:}
Given a test set of size $T$, correlation is calculated
as follows:
{
\[
\begin{array}{l@{~}c@{~}l@{~}l@{~}c@{~}l}
\bar{p}&=& \frac{\sum_I^Tpredicted_i}{T}&\bar{a}   &=& \frac{\sum_I^Tactual_i}{T}\\
S_{p}          &=& \frac{\sum_i^T(predicted_i - \bar{p})^2}{T-1}&
S_{a}          &=& \frac{\sum_i^T(actual_i    - \bar{a})^2}{T-1}\\
S_{pa}         &=& \frac{\sum_i^T(predicted_i - \bar{p})(actual_i - \bar{a})}{T-1}\\
corr           &=& S_{pa}/\sqrt{S_p*S_a}\\
\end{array}
\]}


{\bf APPENDIX C - Local Calibration:}
(CHANGE TO METHOD WITHOUT USING LOGGED NUMERICS)
This approach assumes that a  matrix $D_{i,j}$ 
holds:
\bi
\item
The natural log of the
$KLOC$ estimates;
\item
The natural log of  the actual efforts
for projects $i \le $j$ \le t$;
\item
The  natural logarithm of the cost drivers (the scale factors
and effort multipliers) at locations $1\le i \le 15$ (for COCOMO 81)
or $1 \le i \le 22$ (for COCOMO-II).
\ei
With those assumptions, Boehm~\cite{Boehm81} shows that
for COCOMO 81,
the following calculation yields estimates
for ``$a$'' and ``$b$'' that minimizes the sum of the squares of residual
errors:

\begin{equation}\label{eq:ab}{\small
\left.
\begin{array}{rcl}
EAF_i& =& \sum_j^N D_{i,j}\\
a_0  & =& t \\\
a_1  & =& \sum_i^t KLOC_i\\
a_2  & =& \sum_i^t (KLOC_i)^2\\
d_0  & =& \sum_i^t \left(actual_i - EAF_i\right)\\
d_1  & =& \sum_i^t \left((actual_i - EAF_i)*KLOC_i\right)\\
b  & =& (a_0d_1 - a_1*d_0)/(a_0a_2 - a_1^2)\\
a_3    & =& (a_2d_0 - a_1d_1)/(a_0a_2 - a_1^2)\\
a    & =& e^{a_3}
\end{array}\right\}}
\end{equation}



{\bf APPENDIX D -  COCOMO Numerics:}
\fig{em} shows the COCOMO 81 $EM_j$ (effort multipliers).
The effects of that multiplier on the effort are shown in 
 \fig{effortmults}. 
Increasing the {\em upper} and {\em lower} groups of variables will {\em decrease} or {\em 
increase} the effort estimate, respectively.

\begin{figure}
\begin{center}
{\scriptsize
\begin{tabular}{l|r@{:~}l|}\cline{2-3}
upper:   &acap&analysts capability\\
increase &pcap&programmers capability\\
these to &aexp&application experience\\
decrease &modp&modern programming practices\\
effort   &tool&use of software tools\\
         &vexp&virtual machine experience\\
         &lexp&language experience\\\cline{2-3}
middle   &sced&schedule constraint\\\cline{2-3}
lower:   &data&data base size\\
decrease &turn&turnaround time\\
these to &virt&machine volatility\\
increase &stor&main memory constraint\\
effort   &time&time constraint for cpu\\
         &rely&required software reliability\\
         &cplx&process complexity\\\cline{2-3}
\end{tabular}}
\end{center}
\caption{{
COCOMO 81 effort multipliers.}}\label{fig:em}
\end{figure}


\begin{figure}
\begin{center}
{\scriptsize
\begin{tabular}{|l|c|r@{~}|r@{~}|r@{~}|r@{~}|r@{~}|r|}
    \hline
&    &very&&&&very&extra\\
    &&low&low&nominal&high&high&high\\
    \hline
upper&ACAP   &1.46   &1.19   &1.00   &0.86   &0.71   &\\
(increase&PCAP   &1.42 &1.17   &1.00   &0.86   &0.70 &\\
these to&AEXP   &1.29 &1.13   &1.00   &0.91   &0.82   &\\
decrease&MODP   &1.2  &1.10 &1.00 &0.91 &0.82 &\\
effort)&TOOL   &1.24 &1.10 &1.00 &0.91 &0.83 &\\
&VEXP   &1.21 &1.10 &1.00 &0.90 &  &\\
&LEXP   &1.14 &1.07 &1.00 &0.95 &  &\\\hline
middle&SCED   &1.23 &1.08 &1.00 &1.04 &1.10 &  \\\hline
lower&DATA   &    & 0.94 &1.00 &1.08 &1.16&\\
(increase&TURN   &       &0.87   &1.00   &1.07   &1.15   &\\
these to&VIRT   &       &0.87   &1.00   &1.15   &1.30   &\\
increase&STOR   &       &       &1.00   &1.06   &1.21   &1.56\\
effort)&TIME   &  &    &1.00   &1.11   &1.30   &1.66\\
&RELY   &0.75& 0.88& 1.00 & 1.15 & 1.40&\\
&CPLX   &0.70 &0.85 &1.00 &1.15 &1.30 &1.65\\
    \hline
\end{tabular}}
\end{center}
\caption{{ The precise COCOMO 81 effort multiplier
values.}}\label{fig:effortmults}
\end{figure}

\begin{figure}
\begin{center}
{\scriptsize
\begin{tabular}{|l|c|r@{~}|r@{~}|r@{~}|r@{~}|r@{~}|r|}
    \hline
&    &very&&&&very&extra\\
    &&low&low&nominal&high&high&high\\
    \hline
upper &ACAP           & 1.2    & 1.1& 1.00   & 0.9& 0.8     & \\
(increase &PCAP           & 1.2    & 1.1& 1.00   & 0.9& 0.8     & \\
these to&AEXP           & 1.2    & 1.1& 1.00   & 0.9& 0.8     & \\
decrease&MODP           & 1.2    & 1.1& 1.00   & 0.9& 0.8     & \\
effort)&TOOL           & 1.2    & 1.1& 1.00   & 0.9& 0.8     & \\
&VEXP           & 1.2    & 1.1& 1.00   & 0.9&          & \\
&LEXP           & 1.2    & 1.1& 1.00   & 0.9&          &\\\hline
middle&SCED     & 1.2    & 1.1& 1.00   & 1.1& 1.2     & \\\hline

lower&DATA           &        & 0.9  & 1.00   & 1.1& 1.2     & \\
(increase&TURN           &         & 0.9 & 1.00   & 1.1& 1.2     & \\
these to&VIRT           &         & 0.9 & 1.00   & 1.1& 1.2    & \\
increase&STOR           &         &     & 1.00   & 1.1& 1.2     & 1.3\\
effort)&TIME           &         &     & 1.00   & 1.1& 1.2     & 1.3\\
&RELY      & 0.8    & 0.9  & 1.00   & 1.1& 1.2     & \\
&CPLX           & 0.8    & 0.9  & 1.00   & 1.1& 1.2     & 1.3\\    \hline
\end{tabular}}
\end{center}
\caption{{ Rounded COCOMO 81 effort multiplier
values.}}\label{fig:roundedem}
\end{figure}

\fig{roundedem} shows the COCOMO 81 effort multipliers of \fig{effortmults}, rounded
and simplified to two significant figures.

\fig{emsf2},
\fig{effortmults2} and
\fig{effortmultsrd2} show the COCOMO-II values analogies to
\fig{em},
 \fig{effortmults}
and \fig{roundedem} (respectively).


\begin{figure}
\begin{center}
{\scriptsize
\begin{tabular}{l|r@{:~}l|}\cline{2-3}
scale   &prec & have we done this before?\\
factors &flex & development flexibility \\
(exponentially        &resl & any risk resolution activities?\\
 decrease       &team &  team cohesion\\
 effort)       &pmat & process maturity\\\hline
upper  &acap & analyst capability\\
(linearly       &pcap & programmer capability\\
 decrease      &pcon & programmer continuity\\
effort)       &aexp &  analyst experience\\
       &pexp &  programmer experience\\
       &ltex &  language and tool experience\\
       &tool &  tool use\\
       &site &  multiple site development\\
       &sced & length of schedule   \\\hline
lower &rely &    required reliability  \\
(linearly      &data &   secondary memory  storage requirements\\
increase      &cplx &  program complexity\\
effort)      &ruse &  software reuse\\
      &docu &   documentation requirements\\
      &time &   runtime pressure\\
      &stor &   main memory requirements\\
     &pvol &    platform volatility  \\\cline{2-3}
\end{tabular}}
\end{center}
\caption{{
The COCOMO~II scale factors and effort multipliers.}}\label{fig:emsf2}
\end{figure}



\begin{figure}
\begin{center}
{\scriptsize
\begin{tabular}{|l|c|r@{~}|r@{~}|r@{~}|r@{~}|r@{~}|r@{~}|r|}
    \hline &    &extra &very&   &       &    &very &extra\\
           &    &low &low &low&nominal&high&high &high\\
    \hline
scale   &prec &   &6.20  &4.96  &3.72  &2.48  &1.24  &0.00\\
factors &flex &   &5.07  &4.05  &3.04  &2.03  &1.01  &0.00\\
(exponentially        &resl &   &7.07  &5.65  &4.24  &2.83  &1.41  &0.00\\
decreases         &team &   &5.48  &4.38  &3.29  &2.19  &1.10  &0.00\\
effort)        &pmat &   &7.80  &6.24  &4.68  &3.12  &1.56  &0.00\\
    \hline
upper  &acap &   &1.42  &1.19  &1.00  &0.85  &0.71  &  \\
(linearly       &pcap &   &1.34  &1.15  &1.00  &0.88  &0.76  &  \\
 decreases      &pcon &   &1.29  &1.12  &1.00  &0.90  &0.81  &  \\
effort)       &aexp &   &1.22  &1.10  &1.00  &0.88  &0.81  &  \\
       &pexp &   &1.19  &1.09  &1.00  &0.91  &0.85  &  \\
       &ltex &   &1.20  &1.09  &1.00  &0.91  &0.84  &  \\
       &tool &   &1.17  &1.09  &1.00  &0.90  &0.78  &  \\
       &site &   &1.22  &1.09  &1.00  &0.93  &0.86  &0.80\\
       &sced &   &1.43  &1.14  &1.00  &1.00  &1.00  &  \\\hline
lower &rely &    &0.82   &0.92  &1.00  &1.10  &1.26  &  \\
(linearly      &data &    &       &0.90  &1.00  &1.14  &1.28  &  \\
 increases    &cplx &    &0.73   &0.87  &1.00  &1.17  &1.34  &1.74\\
 effort)     &ruse &    &       &0.95  &1.00  &1.07  &1.15  &1.24\\
      &docu &    &0.81   &0.91  &1.00  &1.11  &1.23  &  \\
      &time &    &       &      &1.00  &1.11  &1.29  &1.63\\
      &stor &    &       &      &1.00  &1.05  &1.17  &1.46\\
     &pvol &    &       &0.87  &1.00  &1.15  &1.30  &  \\
    \hline
\end{tabular}}
\end{center}
\caption{{ The precise COCOMO II numerics.}}\label{fig:effortmults2}
\end{figure}


\begin{figure}
\begin{center}
{\scriptsize
\begin{tabular}{|l|c|r@{~}|r@{~}|r@{~}|r@{~}|r@{~}|r@{~}|r|}
    \hline &    &extra &very&   &       &    &very &extra\\
           &    &low &low &low&nominal&high&high &high\\
    \hline
Scale   &PREC &   &6.3  &5.1  &3.8  &2.5  &1.3  &0\\
Factors &FLEX &   &6.3  &5.1  &3.8  &2.5  &1.3  &0\\
        &RESL &   &6.3  &5.1  &3.8  &2.5  &1.3  &0\\
        &TEAM &   &6.3  &5.1  &3.8  &2.5  &1.3  &0\\
        &PMAT &   &6.3  &5.1  &3.8  &2.5  &1.3  &0\\
    \hline
upper  &ACAP &   &1.3  &1.1  &1.0  &0.9  &0.8  &  \\
       &PCAP &   &1.3  &1.1  &1.0  &0.9  &0.8  &  \\
       &PCON &   &1.3  &1.1  &1.0  &0.9  &0.8  &  \\
       &AEXP &   &1.3  &1.1  &1.0  &0.9  &0.8  &  \\
       &PEXP &   &1.3  &1.1  &1.0  &0.9  &0.8  &  \\

       &LTEX &   &1.3  &1.1  &1.0  &0.9  &0.8  &  \\
       &TOOL &   &1.3  &1.1  &1.0  &0.9  &0.8  &  \\
       &SITE &   &1.3  &1.1  &1.0  &0.9  &0.8  &0.8\\
       &SCED &   &1.3  &1.1  &1.0  &0.9  &0.8  &  \\
    \hline
lower &RELY &    &0.8   &0.9  &1.0  &1.1  &1.3  &  \\
      &DATA &    &       &0.9  &1.0  &1.1  &1.3  &  \\
      &CPLX &    &0.8   &0.9  &1.0  &1.1  &1.3  &1.5\\
      &RUSE &    &       &0.9  &1.0  &1.1  &1.3  &1.5\\
      &DOCU &    &0.8   &0.9  &1.0  &1.1  &1.3  &  \\
      &TIME &    &       &      &1.0  &1.1  &1.3  &1.5\\
      &STOR &    &       &      &1.0  &1.1  &1.3  &1.5\\
      &PVOL &    &       &0.9  &1.0  &1.1  &1.3  &  \\
    \hline
\end{tabular}}
\end{center}
\caption{{ The rounded COCOMO II numerics.}}\label{fig:effortmultsrd2}
\end{figure}


%\newpage
{\bf APPENDIX E:}
%\begin{table}[!t]
%\begin{table}
\begin{figure}
\begin{tabular}{|l|l|l|l|l|l|l|l|l|l|}
\hline
 & & & & &Avg&Avg&Avg&Avg&Avg\\
Data&Learn&Search&Hrzn&Eval&MMRE&SD&Pred30&Corr&Attr\\
\hline
coc81 &LC&N/A&N/A&N/A&44.72&37.28&38.75&0.94&15.00\\
\hline
coc81 &attr&forward&0&mmre&93.63&101.68&27.92&0.80&1.00\\
\hline
coc81 &attr&forward&0&sd\_mre&93.63&101.68&27.92&0.80&1.00\\
\hline
coc81 &attr&forward&0&pred30&93.63&101.68&27.92&0.80&1.00\\
\hline
coc81 &attr&forward&0&corr&93.63&101.68&27.92&0.80&1.00\\
\hline
coc81 &attr&forward&1&mmre&49.42&42.23&36.25&0.92&13.60\\
\hline
coc81 &attr&forward&1&sd\_mre&65.11&59.69&32.50&0.88&8.97\\
\hline
coc81 &attr&forward&1&pred30&83.87&81.42&26.67&0.84&2.90\\
\hline
coc81 &attr&forward&1&corr&64.39&58.99&30.00&0.88&9.63\\
\hline
coc81 &attr&forward&2&mmre&45.73&37.66&38.75&0.94&14.80\\
\hline
coc81 &attr&forward&2&sd\_mre&51.27&45.17&37.08&0.92&13.30\\
\hline
coc81 &attr&forward&2&pred30&76.11&70.23&26.67&0.86&5.27\\
\hline
coc81 &attr&forward&2&corr&57.77&50.99&32.50&0.90&11.67\\
\hline
coc81 &attr&forward&4&mmre&45.73&37.66&38.75&0.94&14.80\\
\hline
coc81 &attr&forward&4&sd\_mre&49.08&42.47&37.92&0.93&13.67\\
\hline
coc81 &attr&forward&4&pred30&72.18&67.43&28.75&0.87&6.73\\
\hline
coc81 &attr&forward&4&corr&53.95&46.20&33.75&0.90&12.37\\
\hline
coc81 &attr&forward&8&mmre&45.73&37.66&38.75&0.94&14.80\\
\hline
coc81 &attr&forward&8&sd\_mre&49.08&42.47&37.92&0.93&13.67\\
\hline
coc81 &attr&forward&8&pred30&69.70&63.81&27.50&0.88&7.37\\
\hline
coc81 &attr&forward&8&corr&53.95&46.20&33.75&0.90&12.37\\
\hline
coc81 &attr&forward&16&mmre&45.73&37.66&38.75&0.94&14.80\\
\hline
coc81 &attr&forward&16&sd\_mre&49.08&42.47&37.92&0.93&13.67\\
\hline
coc81 &attr&forward&16&pred30&69.53&63.22&27.08&0.88&7.43\\
\hline
coc81 &attr&forward&16&corr&53.95&46.20&33.75&0.90&12.37\\
\hline
coc81 &attr&back&0&mmre&49.99&43.76&35.00&0.93&14.00\\
\hline
coc81 &attr&back&0&sd\_mre&49.99&43.76&35.00&0.93&14.00\\
\hline
coc81 &attr&back&0&pred30&49.99&43.76&35.00&0.93&14.00\\
\hline
coc81 &attr&back&0&corr&49.99&43.76&35.00&0.93&14.00\\
\hline
coc81 &attr&back&1&mmre&49.99&43.76&35.00&0.93&14.00\\
\hline
coc81 &attr&back&1&sd\_mre&50.32&44.34&35.00&0.93&13.93\\
\hline
coc81 &attr&back&1&pred30&52.50&45.07&34.17&0.92&13.67\\
\hline
coc81 &attr&back&1&corr&51.20&45.19&36.25&0.92&13.73\\
\hline
coc81 &attr&back&2&mmre&50.18&43.67&35.00&0.93&13.97\\
\hline
coc81 &attr&back&2&sd\_mre&50.41&44.64&35.00&0.93&13.87\\
\hline
coc81 &attr&back&2&pred30&54.01&45.74&32.92&0.92&13.27\\
\hline
coc81 &attr&back&2&corr&53.51&46.26&32.50&0.90&12.57\\
\hline
coc81 &attr&back&4&mmre&51.47&44.53&34.17&0.92&13.87\\
\hline
coc81 &attr&back&4&sd\_mre&52.70&48.03&33.75&0.93&13.40\\
\hline
coc81 &attr&back&4&pred30&55.97&47.29&31.67&0.91&12.30\\
\hline
coc81 &attr&back&4&corr&54.05&46.27&32.50&0.90&12.40\\
\hline
coc81 &attr&back&8&mmre&52.43&45.89&33.75&0.92&13.53\\
\hline
coc81 &attr&back&8&sd\_mre&54.46&49.98&35.42&0.92&12.67\\
\hline
coc81 &attr&back&8&pred30&58.27&49.29&27.50&0.90&11.47\\
\hline
coc81 &attr&back&8&corr&54.31&46.56&32.08&0.90&12.30\\
\hline
coc81 &attr&back&16&mmre&52.43&45.89&33.75&0.92&13.53\\
\hline
coc81 &attr&back&16&sd\_mre&54.66&50.33&35.83&0.92&12.53\\
\hline
coc81 &attr&back&16&pred30&59.70&50.37&27.50&0.90&11.23\\
\hline
coc81 &attr&back&16&corr&54.31&46.56&32.08&0.90&12.30\\
\hline
\end{tabular}
\caption{Standard COCOMO Local Calibration vs. Greedy FSS for the COC81 dataset.}
\label{fig:attrCOC81}
\end{figure}
%\end{table}

\newpage
\newpage
{\bf APPENDIX F:}
%\section{Appendix 2}
%\begin{table}[h]
\begin{figure}
\begin{tabular}{|l|l|l|l|l|l|l|l|l|l|}
\hline
 & & & & &Avg&Avg&Avg&Avg&Avg\\
Data&Learn&Search&Hrzn&Eval&MMRE&SD&Pred30&Corr&Attr\\
\hline
nasa93&LC&N/A&N/A&N/A&43.95&49.96&52.92&0.90&15.00\\
\hline
nasa93&attr&forward&0&mmre&52.88&54.98&47.08&0.77&1.00\\
\hline
nasa93&attr&forward&0&sd\_mre&52.88&54.98&47.08&0.77&1.00\\
\hline
nasa93&attr&forward&0&pred30&52.88&54.98&47.08&0.77&1.00\\
\hline
nasa93&attr&forward&0&corr&52.88&54.98&47.08&0.77&1.00\\
\hline
nasa93&attr&forward&1&mmre&45.94&46.05&49.17&0.86&5.73\\
\hline
nasa93&attr&forward&1&sd\_mre&48.45&50.57&50.42&0.83&2.40\\
\hline
nasa93&attr&forward&1&pred30&48.77&49.74&48.33&0.83&1.97\\
\hline
nasa93&attr&forward&1&corr&44.16&43.57&47.50&0.86&5.47\\
\hline
nasa93&attr&forward&2&mmre&44.21&44.06&50.00&0.87&7.20\\
\hline
nasa93&attr&forward&2&sd\_mre&48.45&50.19&49.17&0.83&2.67\\
\hline
nasa93&attr&forward&2&pred30&46.20&45.71&47.08&0.85&6.30\\
\hline
nasa93&attr&forward&2&corr&43.23&43.17&51.25&0.87&6.97\\
\hline
nasa93&attr&forward&4&mmre&44.51&44.06&50.42&0.88&8.33\\
\hline
nasa93&attr&forward&4&sd\_mre&47.22&50.29&50.00&0.84&3.57\\
\hline
nasa93&attr&forward&4&pred30&46.73&46.90&48.75&0.85&7.80\\
\hline
nasa93&attr&forward&4&corr&41.95&42.06&53.75&0.87&7.80\\
\hline
nasa93&attr&forward&8&mmre&44.05&44.23&51.67&0.88&8.40\\
\hline
nasa93&attr&forward&8&sd\_mre&46.88&49.17&50.00&0.83&4.90\\
\hline
nasa93&attr&forward&8&pred30&46.02&47.31&50.83&0.86&8.10\\
\hline
nasa93&attr&forward&8&corr&41.72&41.72&53.33&0.88&7.93\\
\hline
nasa93&attr&forward&16&mmre&44.05&44.23&51.67&0.88&8.40\\
\hline
nasa93&attr&forward&16&sd\_mre&46.82&49.42&49.58&0.83&5.10\\
\hline
nasa93&attr&forward&16&pred30&46.02&47.31&50.83&0.86&8.10\\
\hline
nasa93&attr&forward&16&corr&41.72&41.72&53.33&0.88&7.93\\
\hline
nasa93&attr&back&0&mmre&42.34&43.50&52.08&0.90&14.00\\
\hline
nasa93&attr&back&0&sd\_mre&42.34&43.50&52.08&0.90&14.00\\
\hline
nasa93&attr&back&0&pred30&42.34&43.50&52.08&0.90&14.00\\
\hline
nasa93&attr&back&0&corr&42.34&43.50&52.08&0.90&14.00\\
\hline
nasa93&attr&back&1&mmre&41.66&41.23&52.50&0.89&11.70\\
\hline
nasa93&attr&back&1&sd\_mre&41.77&43.19&52.08&0.90&11.07\\
\hline
nasa93&attr&back&1&pred30&42.45&42.11&52.92&0.90&13.10\\
\hline
nasa93&attr&back&1&corr&42.85&41.65&50.00&0.88&10.43\\
\hline
nasa93&attr&back&2&mmre&44.76&44.95&50.00&0.87&9.33\\
\hline
nasa93&attr&back&2&sd\_mre&48.67&51.18&48.75&0.82&4.53\\
\hline
nasa93&attr&back&2&pred30&41.93&41.19&51.25&0.89&12.67\\
\hline
nasa93&attr&back&2&corr&42.23&43.24&50.83&0.88&8.10\\
\hline
nasa93&attr&back&4&mmre&45.72&46.65&52.08&0.86&8.23\\
\hline
nasa93&attr&back&4&sd\_mre&47.60&48.90&49.17&0.82&3.77\\
\hline
nasa93&attr&back&4&pred30&42.79&43.44&50.83&0.88&11.13\\
\hline
nasa93&attr&back&4&corr&41.87&42.37&51.25&0.88&7.63\\
\hline
nasa93&attr&back&8&mmre&45.77&47.00&54.58&0.85&7.73\\
\hline
nasa93&attr&back&8&sd\_mre&47.60&48.90&49.17&0.82&3.77\\
\hline
nasa93&attr&back&8&pred30&44.52&45.91&49.17&0.88&10.40\\
\hline
nasa93&attr&back&8&corr&41.91&42.59&52.08&0.88&7.47\\
\hline
nasa93&attr&back&16&mmre&45.77&47.00&54.58&0.85&7.73\\
\hline
nasa93&attr&back&16&sd\_mre&47.60&48.90&49.17&0.82&3.77\\
\hline
nasa93&attr&back&16&pred30&44.78&46.42&48.75&0.88&10.30\\
\hline
nasa93&attr&back&16&corr&41.91&42.59&52.08&0.88&7.47\\
\hline
\end{tabular}
\caption{Standard COCOMO Local Calibration vs. Greedy FSS for the NASA93 dataset.}
\label{fig:attrNASA}
\end{figure}
%\end{table}


\end{document}
