Java="$Here"
Weka="nice -n 20 java -Xmx2048M -cp $Java/weka.jar "
Seed=1
tempARFF="./temp.arff"

###########################################################################
# Entry point for the system #
getResults()
{
	dataList $1 $2
}

# List of datasets to be tested #	
dataList()
{
	# Effort Estimation Datasets #
	if [ "$1" = "ee" ]; then
		d_albrecht $2
		d_china $2
		d_cocomo81 $2
		d_cocomo81e $2
		d_cocomo81o $2
		d_cocomo81s $2
		d_desharnais $2
		d_desharnaisL1 $2
		d_desharnaisL2 $2
		d_desharnaisL3 $2
		d_finnish $2
		d_kemerer $2
		d_maxwell $2
		d_miyazaki94 $2
		d_nasa93center1 $2
		d_nasa93center2 $2
		d_nasa93center5 $2
		sdr $2
		telecom1 $2
	fi
	if [ "$1" = "demo" ]; then
		d_albrecht loo
		d_cocomo81s loo
	fi
}

# List of data-splitting techniques #
splitList()
{
	# Leave-One Out#
	if [ "$1" = "loo" ]; then
		ds_loo
	fi
}

# List of preprocessor techniques #
preprocessorList()
{
	pp_none
	pp_log
	pp_freq3bin
	pp_freq5bin
	pp_width3bin
	pp_width5bin
	pp_normalize
	pp_pca
}

# List of learner techniques #
learnerList()
{
	l_oneNN
	l_fiveNN
	l_nnet
	#l_cartY requires non-numeric class
	#l_cartN requires non-numeric class
	#l_slreg doesn't support nonnumeric attributes
	# Non COMBA 1 #
	l_zeroR
}

# List of error measures #
errorList()
{
	# looks at raw data and begins computing derived error measures
	e_ar
	e_mre
	e_mer
	e_bre
	e_ibre
}
###########################################################################
# Data Sets #
d_albrecht()
{
		data="./data/albrecht.arff"
		data_name=albrecht
		splitList $1
}
d_china()
{
		data="./data/china.arff"
		data_name=china
		splitList $1
}
d_cocomo81()
{
		data="./data/cocomo81.arff"
		data_name=cocomo81
		splitList $1
}
d_cocomo81e()
{
		data="./data/cocomo81e.arff"
		data_name=cocomo81e
		splitList $1
}
d_cocomo81o()
{
		data="./data/cocomo81o.arff"
		data_name=cocomo81o
		splitList $1
}
d_cocomo81s()
{
		data="./data/cocomo81s.arff"
		data_name=cocomo81s
		splitList $1
}
d_desharnais()
{
		data="./data/desharnais.arff"
		data_name=desharnais
		splitList $1
}
d_desharnaisL1()
{
		data="./data/desharnaisL1.arff"
		data_name=desharnaisL1
		splitList $1
}
d_desharnaisL2()
{
		data="./data/desharnaisL2.arff"
		data_name=desharnaisL2
		splitList $1
}
d_desharnaisL3()
{
		data="./data/desharnaisL3.arff"
		data_name=desharnaisL3
		splitList $1
}
d_finnish()
{
		data="./data/finnish.arff"
		data_name=finnish
		splitList $1
}
d_kemerer()
{
		data="./data/kemerer.arff"
		data_name=kemerer
		splitList $1
}
d_maxwell()
{
		data="./data/maxwell.arff"
		data_name=maxwell
		splitList $1
}
d_miyazaki94()
{
		data="./data/miyazaki94.arff"
		data_name=miyazaki94
		splitList $1
}
d_nasa93center1()
{
		data="./data/nasa93_center_1.arff"
		data_name=nasa93center1
		splitList $1
}
d_nasa93center2()
{
		data="./data/nasa93_center_2.arff"
		data_name=nasa93center2
		splitList $1
}
d_nasa93center5()
{
		data="./data/nasa93_center_5.arff"
		data_name=nasa93center5
		splitList $1
}
sdr()
{
		data="./data/sdr.arff"
		data_name=sdr
		splitList $1
}
telecom1()
{
		data="./data/telecom1.arff"
		data_name=telecom1
		splitList $1
}
###########################################################################
# Data Splitters #
# Leave-One Out #
ds_loo()
{
		splitter_name=loo
		# determine size of data
		numberInstances=`gawk 'BEGIN{ pr=0; } {if($1 == "@data") pr=1; else if (pr) print $0}' $data | wc -l | gawk '{print $1}'`
		# set optional command
		options="-x "
		options+=$numberInstances
		# pass to preprocessor
		preprocessorList
}
###########################################################################
# Preprocessors #
# None #
pp_none()
{
	preprocessor_name=none
	preprocessor="weka.filters.AllFilter"
	learnerList
}
# Logarithmic #
pp_log()
{
	preprocessor_name=log
	#log(e) = 0.434294482
	preprocessor="weka.filters.unsupervised.attribute.MathExpression -R last -E ifelse(A=0,0,(log(A))/0.434294482)"
	learnerList
}
# Normalization #
pp_normalize()
{
	preprocessor_name=norm
	preprocessor="weka.filters.unsupervised.instance.Normalize -c last"
	learnerList
}
# 3-Bin Equal Frequency Discretization #
pp_freq3bin()
{
	preprocessor_name=freq3bin
	preprocessor="weka.filters.unsupervised.attribute.Discretize -B 3 -F -c last"
	learnerList
}
# 5-Bin Equal Frequency Discretization #
pp_freq5bin()
{
	preprocessor_name=freq5bin
	preprocessor="weka.filters.unsupervised.attribute.Discretize -B 5 -F -c last"
	learnerList
}
# 3-Bin Equal Width Discretization #
pp_width3bin()
{
	preprocessor_name=width3bin
	preprocessor="weka.filters.unsupervised.attribute.Discretize -B 3 -c last"
	learnerList
}
# 5-Bin Equal Width Discretization #
pp_width5bin()
{
	preprocessor_name=width5bin
	preprocessor="weka.filters.unsupervised.attribute.Discretize -B 5 -c last"
	learnerList
}
# Stepwise Regression #
pp_swreg()
{
	preprocessor_name=SWreg
	# not in weka?
	learnerList
}
# Principle Component Analysis #
pp_pca()
{
	preprocessor_name=pca
	preprocessor="weka.filters.unsupervised.attribute.PrincipalComponents -D -A -1 -M -1 -c last"
	learnerList
}
# Sequential Filter Sampler #
pp_sfs()
{
	preprocessor_name=sfs
	# not in weka?
	learnerList
}

# Partial Least Squares Regression #
pp_simpleplsfilter()
{
	preprocessor_name=simpls
	local components="-C "
	components+=3
	preprocessor="weka.filters.supervised.attribute.PLSFilter -M -A SIMPLS -P none -c last "
	preprocessor+=$components
	#$Weka $preprocessor -i $data 
	learnerList
}

pp_principalComponents()
{
	preprocessor_name=principalComponents
	preprocessor="weka.learners.filters.unsupervised.attribute.PrincipalComponents -R 0.95 -A 5 -M -1"
	learnerList
}
###########################################################################
# Learners #
# ZeroR #
l_zeroR()
{
	learner_name=ZeroR
	learner=weka.classifiers.rules.ZeroR 
	checkEval
}
# Analogy Based Estimation - One Nearest Neighbor #
l_oneNN()
{
	learner_name=1NN
	learner=weka.classifiers.lazy.IBk
	checkEval
}
# Analogy Based Estimation - Five Nearest Neighbor #
l_fiveNN()
{
	learner_name=ABE0
	learner="weka.classifiers.lazy.IBk -K 5"
	checkEval
}
# Simple Linear Regression #
l_slreg()
{
	learner_name=SLReg
	learner="weka.classifiers.functions.SimpleLinearRegression"
	checkEval
}
# Simple CART with pruning #
l_cartY()
{
	learner_name=CARTy
	learner="weka.classifiers.trees.SimpleCart -U"
	checkEval
}
# Simple CART without pruning #
l_cartN()
{
	learner_name=CARTn
	learner="weka.classifiers.trees.SimpleCart"
	checkEval
}
# Neural Net #
l_nnet()
{
	learner_name=nnet
	learner="weka.classifiers.functions.MultilayerPerceptron -N 50"
	checkEval
}
# Partial Least Squares Regression #
l_plsr()
{
	learner_name=plsr
	learner="weka.classifiers.functions.LeastMedSq"
	checkEval
}
# Principle Components Regression #
l_pcr()
{
	learner_name=pcr
	# not in weka?
	checkEval
}
# Stepwise Regression #
l_swreg()
{
	learner_name=SWreg
	# not in weka?
	learnerList
}
###########################################################################
# Error Calculators #
# Absolute Residual Error #
e_ar()
{
	setRawfile
	error_name=ar
	setOutfile
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $rawfile | gawk '{if($1 && $2) a = $1 - $2; print (a >= 0) ? a : -a; }' > $outfile
	fi
}
# Magnitude of Relative Error #
e_mre()
{
	setRawfile
	error_name=mre
	setOutfile
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $rawfile | gawk '{if($1 && $2) a = $1 - $2; print (a >= 0) ? a / $1 : -a / $1; }' > $outfile
	fi
}
# Magnitude of Error Relative to the Estimate #
e_mer()
{
	setRawfile
	error_name=mer
	setOutfile
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $rawfile | gawk '{if($1 && $2) a = $1 - $2; print (a >= 0) ? a / $2 : -a / $2; }' > $outfile
	fi
}
# Balanced Relative Error #
e_bre()
{
	setRawfile
	error_name=bre
	setOutfile
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $rawfile | gawk '{if($1 && $2) a = $1 - $2; if ($1 > $2) { print (a >= 0) ? a / $2 : -a / $2;} else {print (a >= 0) ? a / $1 : -a / $1;}}' > $outfile
	fi
}
# Inverted Balanced Relative Error #
e_ibre()
{
	setRawfile
	error_name=ibre
	setOutfile
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $rawfile | gawk '{if($1 && $2) a = $1 - $2; if ($1 < $2) { print (a >= 0) ? a / $2 : -a / $2;} else {print (a >= 0) ? a / $1 : -a / $1;}}' > $outfile
	fi
}
###########################################################################
# Misc #
checkEval()
{
	# Checks if raw combo exists previously or has been modified
	error_name=raw
	setOutfile
	# Evaluate tests if required, otherwise proceed
	if [ ! -e $outfile ]; then #|| [ -N $outfile ]; then
		callRawEval
	fi
	errorList
}
callRawEval()
{
	if [ "$splitter_name" = "loo" ]; then
		echo "I am preprocessing."
		$Weka $preprocessor -i $data -o $tempARFF
		echo "I am learning."
		$Weka $learner -t $tempARFF -s $Seed $options -p 0 | formatFile > $outfile
		Seed=1
	fi
}
formatFile()
{
	# First column is actual, second is predicted
	gawk '{ if (NR > 5 && $2 && $3) print $2 "," $3; }'
}
setRawfile()
{
	error_name=raw
	setOutfile
	rawfile=$outfile
}
setOutfile()
{	
	outfile=./results/
	outfile+=$data_name
	outfile+=_
	outfile+=$splitter_name
	outfile+=_
	outfile+=$preprocessor_name
	outfile+=_
	outfile+=$learner_name 
	outfile+=_
	outfile+=$error_name
	outfile+=.csv
}	
###########################################################################

# WEKA Wrapper for Debug #
weka(){
 $Weka $1 $2 $3 $4 $5 $6 $7 $8 $9 
}

# Make override for debug #
make() 
{
	cd $Here
	. comba.sh
}

# CSV to ARFF conversion #
c2a2() 
{
	local outarff=$1
	local incsv=$outarff
	incsv+=".csv"
	outarff+=".arff"
	local converter="weka.core.converters.CSVLoader"
	$Weka $converter $incsv | cat > $outarff
}

# System Opening Messages #
echo "COMBA 2 by Vincent Rogers and William Sica"
echo ""
PS1="COMBA> "