# justin
# not that you need this but 
# http://www.cs.waikato.ac.nz/~remco/weka_bn/node13.html
# may be usefulG

Dirs="../../raw"
Arffs="$Dirs/*.arff"
Tmp=/tmp/intersect

## generic - don't touch!!!!
Weka="java   -Xmx1024M -cp ./weka.jar"
Log="gawk -f $Here/asLogs"
Discretize="$Weka weka.filters.supervised.attribute.Discretize "
Rank="$Weka  weka.filters.supervised.attribute.AttributeSelection"	
 
[ -d $Tmp ] && rm -rf $Tmp
mkdir -p $Tmp
mkdir -p $Tmp/etc
mkdir -p $Tmp/arff
mkdir -p $Tmp/shared

#cat<<EOF>$Tmp/etc/seds
#s/loccodeandcomment/loc_code_and_comment/
#s/locodeandcomment/loc_code_and_comment/
#s/locandcomment/loc_code_and_comment/
#s/essential_complexity/ev(g)/
#s/cyclomatic_complexity/v(g)/
#s/halstead_length/n/
#s/halstead_level/l/
#s/num_operators/n1/
#s/num_operands/n2/
#s/unique_operands/uniq_opnd/
#s/unique_operators/uniq_op/
#s/halstead_content/i/
#s/halstead_error_est/b/
#s/halstead_prog_time/t/
#s/halstead_effort/e/
#s/halstead_difficulty/d/
#s/halstead_volume/v/
#s/loc_comments/loc_comment/
#s/design_complexity/iv(g)/
#s/locomment/loc_comment/
#s/loc_total/loc/
#s/locode/loc/
#s/[\t ]c[\t ]/ defects /
#s/[\t ]problems[\t ]/ defects /
#s/branchcout/branch_count/
#s/total_op[\t ]/n1 /
#s/total_opnd/n2/
#s/{no,yes}/{false,true}/
#EOF
## no yes
#
#for i in $Arffs; do
#	cat $i | 
#	tr A-Z a-z |
#	sed -f $Tmp/etc/seds \
#   	> $Tmp/arff/`basename $i`
#done
#
#shared() {
#	for i in `gawk -f intersectAttributes.awk $Tmp/arff/*.arff | sort | grep -v defects`; do 
#		echo $i
#	done
#	echo defects
#}
#Shared=`shared`
#
#for i in $Tmp/arff/*.arff; do
#		echo $i
#		gawk -f some.awk -v Some="$Shared" $i > $Tmp/shared/`basename $i`
#done
#
report() {
	gawk  'BEGIN {RS=""; FS="\n"} {N++;   R[N]=indent($0)} 
           END   {for(r=9;r<=12;r++) {print R[r];  print ""}}
           function indent(str, i, out) {
           		for(i=1;i<=NF;i++)
           			out=out "\n        " $i
           		return out
           }
           ' $1
} 
cp weka.jar $Tmp/shared
Here=`pwd`
cd $Tmp/shared

Max=10
for i in $Here/../../old/*.arff; do
	stem=`basename $i`
	stem=${stem/.*/}

	#logg the numerics
	gawk -f $Here/asLogs $i 	> logged.arff

	# discretize
	$Discretize -c last -R first-last  -i logged.arff -o discrete.arff
	
	# rank attributes on infogaon
	$Weka  weka.filters.supervised.attribute.AttributeSelection \
             -S "weka.attributeSelection.Ranker -T -2.7976931348623157E308 -N -1"  \
             -E  "weka.attributeSelection.InfoGainAttributeEval"                    \
             -i   discrete.arff -o ranked.arff
	#standard WEKA
	echo "standard n-fold"
	$Weka weka.classifiers.bayes.NaiveBayes  -K  -i -v -t  ranked.arff 
	read
#	for S in 16   3  
 #       do
 #              $Weka weka.filters.unsupervised.attribute.Remove  -R "${S}-16" \
 #                   -i  ranked.arff -o ranked${S}.arff
 #   done  
#done  
	#echo "my controlled n-fold"

	#		   (echo "file,S,n,a,b,c,d,acc,pd,pf,prec,g"
	##		   for((R=1;R<=$Max;R++)); do
	#			   gawk -f $Here/someArff -v Seed=$RANDOM Bins=$Max Bin=$R ranked${S}.arff
	#			   $Weka weka.classifiers.bayes.NaiveBayes  -K  -p 0 -t  train.arff -T test.arff  | 
	#			   gawk -f $Here/gotwant | 
	#			   gawk -f $Here/abcd Prefix="$stem,$S,$R"
	#		   done 
	#		   ) | align -s/, 
	 Seed=$Random
	 (echo "#data,fold,a,b,c,d,acc,pd,pf,prec,g"
	 for((R=1;R<=10;R++)); do
		$Weka weka.filters.supervised.instance.StratifiedRemoveFolds \
		      -c 17 -S $Seed    -N 10 -F $R -i ranked.arff >  test.arff
	 	$Weka weka.filters.supervised.instance.StratifiedRemoveFolds \
		       -c 17 -S $Seed -V -N 10 -F $R -i ranked.arff > train.arff
     	$Weka weka.classifiers.bayes.NaiveBayes -K  -p 0 -t  train.arff -T test.arff |
			gawk -f $Here/gotwant | 
			gawk -f $Here/abcd  Prefix="$stem,$R"
	done 
	) | align -s/, 
	exit
 done
     
