#! /bin/bash # Variables #MyRoot="../../../trunk/" RawData=$1 DefaultFile=$2 ProjectFile=$3 NumberOfExperts=$4 TestSizeOfExperts=$5 Iterations=$6 Seed=$7 From=50 To=75 ValidationInfo=0 #RawData=$MyRoot"data/coc81.csv" #DefaultFile="default" #ProjectFile="project" #NumberOfExperts="10" #TestSizeOfExperts="0.33" #Iterations="100" #Seed=$RANDOM Data="/tmp/$USER/converted_data.tmp" ExpertFile="/tmp/$USER/experts.tmp" MergedFile="/tmp/$USER/merged_results.tmp" MergedNumerics="/tmp/$USER/merged_numerics.tmp" EstimatesFile="/tmp/$USER/estimates.tmp" BestEstimatesFile="/tmp/$USER/bestestimates.tmp" EstimatesFileFlipped="/tmp/$USER/estimates_flipped.tmp" BestEstimatesFileFlipped="/tmp/$USER/bestestimates_flipped.tmp" TmpFile1="/tmp/$USER/1.tmp" TmpFile2="/tmp/$USER/2.tmp" Report="/tmp/$USER/report" ReportFlipped="/tmp/$USER/report_flipped" Arrow="/tmp/$USER/arrow.plt" echo "#VARIABLES" echo "# Experts: "$NumberOfExperts echo "# Test Size of Experts: "$TestSizeOfExperts echo "# Iterations For Each Expert: "$Iterations echo "# Seed: "$Seed # Create tmp directory if necessary if [ ! -d /tmp/$USER ] then mkdir "/tmp/$USER" fi echo "#STEP 1" echo "#RAW TRAINING DATA - BEGIN. FILE LOCATED AT "$RawData cat $RawData echo "#RAW TRAINING DATA - END" echo "" echo "#STEP 2" echo "#CONVERTING TRAINING DATA..." gawk -f "convertcsv.awk" Method="COC81" Style="precise" $RawData > $Data echo "#CONVERTED TRAINING DATA - BEGIN. FILE LOCATED AT "$Data cat $Data echo "#CONVERTED TRAINING DATA - END" echo "" echo "#STEP 3" echo "#GENERATING EXPERTS USING SEED="$Seed" AND SAVING TO "$ExpertFile ( # Print attribute header - IMPORTANT - This is only for COC81 attributes and would need updated for COCII head -1 $RawData | header81 cocomostExperts $Data $Seed $NumberOfExperts $TestSizeOfExperts ) | tee $ExpertFile echo "" echo "#STEP 4" echo "#VIEW OF EXPERTS WITH MATCHED ATTRIBUTES" match $ExpertFile echo "" match $ExpertFile | gawk -F\- 'NF > 3 { for(I=2;I<=NF;I++) N[$I]++ } END { for(I in N) if (I) print N[I] " out of " NR " : " I }' | sort -r -n | cat -n echo "" #Combine the project data and default data echo "#STEP 5" echo "#MERGING DEFAULT AND PROJECT DATA FOR NEW ESTIMATE, SAVING TO "$MergedFile ./merge $MergedFile $DefaultFile $ProjectFile echo "" echo "#STEP 6" echo "#CONVERTING THE MERGED RESULTS TO NUMERIC VALUES, SAVING TO "$MergedNumerics resultsToCocNumerics Method="COC81" $MergedFile | tee $MergedNumerics echo "" if ((ValidationInfo==1)) then echo "#STEP 7a" echo "#VALIDATING MERGED RESULTS" else echo "#STEP 7" echo "#VALIDATING INPUT" fi Result1=$(gawk 'BEGIN{FS=",";Result=1;} { for (i=2;i<=NF;i++) { if ( $i !~ /^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]+)([eE][+-]?[0-9]+)?$/ ) {Result=0;} } }END{print Result}' $MergedNumerics) if ((ValidationInfo==1)) then if ((Result1==0)) then echo "#ERROR: NAN IN "$MergedNumerics echo "#MOST LIKELY THIS IS BECAUSE THERE IS NO COC81 NUMERIC FOR THE VALUE" exit else echo "#MERGED RESULTS OK" echo "" fi fi if ((ValidationInfo==1)) then echo "#STEP 7b" echo "#MAKING SURE THE ORDER OF ATTRIBUTES MATCHES THE TRAINING DATA" fi gawk -F, 'NR==1{print "ksloc";for(i=1;i<=NF;i++){print $i;}}' $ExpertFile > $TmpFile1 gawk -F, '{print $1}' $MergedNumerics > $TmpFile2 Result2=$(diff $TmpFile1 $TmpFile2) if [ -n "$Result2" ] then echo "ERROR - ATTRIBUTE ORDERING DOESN'T MATCH" echo "DIFF RESULTS: "$Result2 exit else if ((ValidationInfo==1)) then echo "#ATTRIBUTE ORDERING OK" echo "" else if ((Result1==0)) then echo "#ERROR: NAN IN "$MergedNumerics echo "#MOST LIKELY THIS IS BECAUSE THERE IS NO COC81 NUMERIC FOR THE VALUE" exit else echo "#VALIDATION:OK" echo "" fi fi fi [ -f "$EstimatesFile" ] && rm $EstimatesFile NumberOfAttributes=$(gawk 'END{print NR}' $MergedNumerics) echo "#STEP 8" echo "#GENERATING "$Iterations" ESTIMATES FOR EACH OF "$NumberOfExperts" EXPERTS" echo "#ALSO SAVING THE ESTIMATES TO "$EstimatesFile triangular_estimates $ExpertFile $NumberOfExperts $MergedNumerics $NumberOfAttributes $Iterations $Seed | sort -n | cat -n | tee $EstimatesFile echo "" echo "#STEP 9" echo "#FINDING THE $From TO $To estimates" echo "#ALSO SAVING THEse ESTIMATES TO "$BestEstimatesFile [ -f "$BestEstimatesFile" ] && rm $BestEstimatesFile gawk 'BEGIN { From = 50; To = 75 } { N[NR]= $2 } END { N1 = int(NR*From/100); N2 = int(NR*To/100); for(I=N1; I<=N2; I++) print I " " N[I] >>Out print From "% = " N[N1] " months " print To "% = " N[N2] " months " print "set arrow 1 from 0," N[N1]" to 0,"N[N2]" linewidth 20 linetype 3 nohead " >Arrow } ' Arrow=$Arrow From=$From To=$To Out=$BestEstimatesFile $EstimatesFile echo "" echo "#STEP 10" echo "#PLOTTING RESULTS" gawk 'BEGIN{FS=OFS=" ";}{print $2,$1}' $EstimatesFile > $EstimatesFileFlipped gawk 'BEGIN{FS=OFS=" ";}{print $2,$1}' $BestEstimatesFile > $BestEstimatesFileFlipped cd /tmp/$USER gnuplot <