Dirs="../../raw"
Arffs="$Dirs/*.arff"
Tmp=/tmp/intersect

# generic - don't touch!!!!
Weka="java   -Xmx1024M -cp ./weka.jar"
Log="gawk -f $Here/asLogs"
Discretize="$Weka weka.filters.supervised.attribute.Discretize "
Rank="$Weka  weka.filters.supervised.attribute.AttributeSelection"	
 
[ -d $Tmp ] && rm -rf $Tmp
mkdir -p $Tmp
mkdir -p $Tmp/etc
mkdir -p $Tmp/arff
mkdir -p $Tmp/shared

cat<<EOF>$Tmp/etc/seds
s/loccodeandcomment/loc_code_and_comment/
s/locodeandcomment/loc_code_and_comment/
s/locandcomment/loc_code_and_comment/
s/essential_complexity/ev(g)/
s/cyclomatic_complexity/v(g)/
s/halstead_length/n/
s/halstead_level/l/
s/num_operators/n1/
s/num_operands/n2/
s/unique_operands/uniq_opnd/
s/unique_operators/uniq_op/
s/halstead_content/i/
s/halstead_error_est/b/
s/halstead_prog_time/t/
s/halstead_effort/e/
s/halstead_difficulty/d/
s/halstead_volume/v/
s/loc_comments/loc_comment/
s/design_complexity/iv(g)/
s/locomment/loc_comment/
s/loc_total/loc/
s/locode/loc/
s/[\t ]c[\t ]/ defects /
s/[\t ]problems[\t ]/ defects /
s/branchcout/branch_count/
s/total_op[\t ]/n1 /
s/total_opnd/n2/
s/{no,yes}/{false,true}/
EOF
# no yes

for i in $Arffs; do
	cat $i | 
	tr A-Z a-z |
	sed -f $Tmp/etc/seds \
   	> $Tmp/arff/`basename $i`
done

shared() {
	for i in `gawk -f intersectAttributes.awk $Tmp/arff/*.arff | sort | grep -v defects`; do 
		echo $i
	done
	echo defects
}
Shared=`shared`

for i in $Tmp/arff/*.arff; do
		echo $i
		gawk -f some.awk -v Some="$Shared" $i > $Tmp/shared/`basename $i`
done

cp weka.jar $Tmp/shared
Here=`pwd`
cd $Tmp/shared

report() {
	gawk  'BEGIN {RS=""; FS="\n"} {N++;  R[N]=indent($0)} 
           END   {print R[18];  
		          print R[16]; print " "}
           function indent(str, i, out) {
           		for(i=1;i<=NF;i++)
           			out=out "\n        " $i
           		return out
           }
           ' $1
} 
Max=10
#for i in *.arff; do
#	stem=`basename $i`
#	stem=${stem/.*/}
#	gawk -f $Here/asLogs $i 	> logged.arff
#	$Discretize -c last -R first-last  -i logged.arff -o discrete.arff
#	echo $stem
#	$Weka  weka.filters.supervised.attribute.AttributeSelection \
#             -S "weka.attributeSelection.Ranker -T -2.7976931348623157E308 -N -1"  \
#             -E  "weka.attributeSelection.InfoGainAttributeEval"                    \
#             -i   discrete.arff -o ranked.arff
#
#	(echo "file,n,a,b,c,d,acc,pd,pf,prec,g"
#	for((R=1;R<=$Max;R++)); do
#			gawk -f $Here/someArff -v Seed=$RANDOM Bins=$Max Bin=$R ranked.arff
#			$Weka weka.classifiers.bayes.NaiveBayes   -p 0 -t  train.arff -T test.arff  | tee $stem.results |
#			gawk -f $Here/gotwant | tee $stem.gotten | 
#			gawk -f $Here/abcd Prefix="$stem,$R"
#	done 
#	) | align -s/, |tee $stem.log
#done 
Max=10
for i in $HERE/old/*.arff; do
	stem=`basename $i`
	stem=${stem/.*/}

	#logg the numerics
	gawk -f $Here/asLogs $i 	> logged.arff

	# discretize
	$Discretize -c last -R first-last  -i logged.arff -o discrete.arff
	echo $stem
	$Weka  weka.filters.supervised.attribute.AttributeSelection \
             -S "weka.attributeSelection.Ranker -T -2.7976931348623157E308 -N -1"  \
             -E  "weka.attributeSelection.InfoGainAttributeEval"                    \
             -i   discrete.arff -o ranked.arff
			 (echo "file,S,n,a,b,c,d,acc,pd,pf,prec,g"
			 for((R=1;R<=$Max;R++)); do
				 gawk -f $Here/someArff -v Seed=$RANDOM Bins=$Max Bin=$R ranked.arff 
				 $Weka weka.classifiers.bayes.NaiveBayes  -K  -p 0 -t  train.arff -T test.arff  | 
				 gawk -f $Here/gotwant | 
				 gawk -f $Here/abcd Prefix="$stem,all,$R"
			 done 
			 ) | align -s/, 
	#$Weka weka.classifiers.bayes.NaiveBayes  -K  -p 0 -t  train.arff -T test.arff  | 
	#for S in    3  
    #    do
    #           $Weka weka.filters.unsupervised.attribute.Remove  -R "${S}-16" \
    #                -i  ranked.arff -o ranked${S}.arff
#			   (echo "file,S,n,a,b,c,d,acc,pd,pf,prec,g"
#			   for((R=1;R<=$Max;R++)); do
#				   gawk -f $Here/someArff -v Seed=$RANDOM Bins=$Max Bin=$R ranked${S}.arff
#				   $Weka weka.classifiers.bayes.NaiveBayes  -K  -p 0 -t  train.arff -T test.arff  | 
#				   gawk -f $Here/gotwant | 
#				   gawk -f $Here/abcd Prefix="$stem,$S,$R"
#			   done 
#			   ) | align -s/, 
#     done  
#done  
#
     #echo " ... using all attributes of $1"
     #$Weka weka.classifiers.bayes.NaiveBayes  -t  ranked.arff #-T discrete.arff > $stem.all
      # exit 
	 #Seed=$Random
	 #(echo "file,n,a,b,c,d,acc,pd,pf,prec,g"
	 #for((R=1;R<=10;R++)); do
	#	$Weka weka.filters.supervised.instance.StratifiedRemoveFolds -c 17 -S $Seed    -N 10 -F $R -i ranked.arff >  test.arff
	# 	$Weka weka.filters.supervised.instance.StratifiedRemoveFolds -c 17 -S $Seed -V -N 10 -F $R -i ranked.arff > train.arff
    # 	$Weka weka.classifiers.bayes.NaiveBayes   -p 0 -t  train.arff -T test.arff | 
	#		gawk -f $Here/gotwant | 
	#		gawk -f $Here/abcd  Prefix="file,$R"
	#done 
	#) | align -s/, 
	 #exit
     
