#!/bin/bash Here=$1 Sandbox=$2 Repeats=$3 Use=$4 Pred=$5 FullAttributeLearners=$6 AnyAttributeLearners=$7 Log=$8 let Min=(Use+10) Drivers="precise proximal" Weka="java -Xmx1024M -cp weka.jar" numberToLog(){ gawk 'BEGIN {FS=","} /@relation/,/@data/ {print; next} NF> 1 {for(i=1;i<=NF;i++) $i=log($i); print $0}' $1; } logToNumber(){ gawk 'NF==3 {OFS=","; E = 2.718281828459045; expect = $3; got = $2 < 0 ? 10^(-20) : $2; expect = E^expect; got = E^got; print Prefix,0,0,got,expect}' Prefix=$Prefix $1 } csvToArff(){ ./csv2arff --ranges rules.config --relation $stem $1; } arffWithNums(){ ./cocNums --numbers $1.config $2; } filterAttributes(){ $Weka weka.filters.unsupervised.attribute.Remove -R $1 -V -i $2 -o $3; } LSRWrapper(){ $Weka weka.attributeSelection.WrapperSubsetEval -S "weka.attributeSelection.BestFirst -D 1 -N 5" -I $1 -B weka.classifiers.functions.LinearRegression -F 5 -T 0.01 -R $Seed -- -S 0 -R 1.0E-8; } M5PWrapper(){ $Weka weka.attributeSelection.WrapperSubsetEval -S "weka.attributeSelection.BestFirst -D 1 -N 5" -I $1 -B weka.classifiers.trees.M5P -F 5 -T 0.01 -R $Seed -- -M 4.0; } OriginalLC(){ Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},e"; ./coseekmo_lc -i $Prefix -p $Pred -a 2.8 -b 1.2 $1; Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},sd"; ./coseekmo_lc -i $Prefix -p $Pred -a 3 -b 1.12 $1; Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},org"; ./coseekmo_lc -i $Prefix -p $Pred -a 3.2 -b 1.05 $1; } generate(){ for i in *.csv do #this is used for manual stratification experiments mainDataset=${i/_*/}_all.csv; stem=${mainDataset/\.*/} csvToArff $mainDataset > mainDataset.arff; stem=${i/\.*/} csvToArff $i > all.arff Size=`./instances all.arff`; [ "$Size" -lt $Min ] && continue ./blab "\n$stem " for((R=1;R<=Repeats;R++)) do ./blab "$R " Seed=$RANDOM # #not needed when doing manual stratification # ./nArffs Seed=$Seed Tests=$Use all.arff #generates a test file and two train file one of which is used for manual stratification experiments ./manualStratificationArffs Seed=$Seed TestSize=$Use all.arff mainDataset.arff; WRAPPER="" for Num in $Drivers do echo "" arffWithNums $Num test.arff > testN.arff arffWithNums $Num train.arff > trainN.arff # #not needed if not doing manual stratification arffWithNums $Num trainManualStratification.arff > trainNManualStratification.arff numberOfAttributes=`./attributes testN.arff` #---------- No Wrapper WRAPPER="None" #---------- Original LC OriginalLC testN.arff; #---------- Full Attribute Learners for Learner in $FullAttributeLearners do #ATTRIBUTECOUNT will be replaced with number of attributes used by the learner, if any. Else, number of attributes passed to it. Prefix="$R,$WRAPPER,${stem},${Num},ATTRIBUTECOUNT,${Learner}"; . $Sandbox/$Learner trainN.arff testN.arff; done #---------- Full LC with manual stratification only Learner=lcFull; Prefix="$R,$WRAPPER,${stem},${Num},ATTRIBUTECOUNT,${Learner}ManualStratification"; . $Sandbox/$Learner trainNManualStratification.arff testN.arff; #---------- Local Wrapper WRAPPER="LocalWrapper" sed 's/ //g' $Num.config > tunings ./localwrapper tunings train.arff > rankings.dat rm -rf subset mkdir -p subset/train subset/test cat rankings.dat train.arff | gawk -f split.awk Stem="subset/train/$stem"; cat rankings.dat test.arff | gawk -f split.awk Stem="subset/test/$stem"; #---------- Any Attribute Learners For Local Wrapper for j in subset/train/*.arff do numberOfAttributes=`./attributes $j`; arffWithNums $Num $j > trainN.arff; arffWithNums $Num subset/test/`basename $j` > testN.arff; for Learner in $AnyAttributeLearners; do Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},${Learner}"; . $Sandbox/$Learner trainN.arff testN.arff; done done #---------- COCOMIN Wrapper WRAPPER="COCOMINWrapper" arffWithNums $Num test.arff > testN.arff arffWithNums $Num train.arff > trainN.arff gawk 'BEGIN {FS=OFS=","} /@/ {next} {print $0}' trainN.arff > COCOMIN_Wrapper_Train; WrapperAttributes=`./fastCocomin 0 COCOMIN_Wrapper_Train`; cat trainN.arff | gawk -f attributeSelector.awk OutputFile=COCOMIN_Wrapper_Train.arff SelectedAttributes=$WrapperAttributes cat testN.arff | gawk -f attributeSelector.awk OutputFile=COCOMIN_Wrapper_Test.arff SelectedAttributes=$WrapperAttributes #---------- Any Attribute Learners For COCOMIN Wrapper numberOfAttributes=`./attributes COCOMIN_Wrapper_Train.arff`; for Learner in $AnyAttributeLearners; do Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},${Learner}"; . $Sandbox/$Learner COCOMIN_Wrapper_Train.arff COCOMIN_Wrapper_Test.arff; done #---------- NewCOCOMIN Wrapper1 WRAPPER="NewCOCOMINWrapperMedian" arffWithNums $Num test.arff > testN.arff arffWithNums $Num train.arff > trainN.arff gawk 'BEGIN {FS=OFS=","} /@/ {next} {print $0}' trainN.arff > NewCOCOMIN_Wrapper1_Train; WrapperAttributes=`./cocomin81 NewCOCOMIN_Wrapper1_Train "backward" "med_mre_hi" "mmre" $Seed`; cat trainN.arff | gawk -f attributeSelector.awk OutputFile=NewCOCOMIN_Wrapper1_Train.arff SelectedAttributes=$WrapperAttributes cat testN.arff | gawk -f attributeSelector.awk OutputFile=NewCOCOMIN_Wrapper1_Test.arff SelectedAttributes=$WrapperAttributes #---------- Any Attribute Learners For COCOMIN Wrapper numberOfAttributes=`./attributes NewCOCOMIN_Wrapper1_Train.arff`; for Learner in $AnyAttributeLearners; do Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},${Learner}"; . $Sandbox/$Learner NewCOCOMIN_Wrapper1_Train.arff NewCOCOMIN_Wrapper1_Test.arff; done #---------- NewCOCOMIN Wrapper2 WRAPPER="NewCOCOMINWrapperNative" arffWithNums $Num test.arff > testN.arff arffWithNums $Num train.arff > trainN.arff gawk 'BEGIN {FS=OFS=","} /@/ {next} {print $0}' trainN.arff > NewCOCOMIN_Wrapper2_Train; WrapperAttributes=`./cocomin81 NewCOCOMIN_Wrapper2_Train "backward" "native" "mmre" $Seed`; cat trainN.arff | gawk -f attributeSelector.awk OutputFile=NewCOCOMIN_Wrapper2_Train.arff SelectedAttributes=$WrapperAttributes cat testN.arff | gawk -f attributeSelector.awk OutputFile=NewCOCOMIN_Wrapper2_Test.arff SelectedAttributes=$WrapperAttributes #---------- Any Attribute Learners For COCOMIN Wrapper numberOfAttributes=`./attributes NewCOCOMIN_Wrapper2_Train.arff`; for Learner in $AnyAttributeLearners; do Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},${Learner}"; . $Sandbox/$Learner NewCOCOMIN_Wrapper2_Train.arff NewCOCOMIN_Wrapper2_Test.arff; done ##---------- NewCOCOMIN Wrapper with all combinations # arffWithNums $Num test.arff > testN.arff # arffWithNums $Num train.arff > trainN.arff # # gawk 'BEGIN {FS=OFS=","} /@/ {next} {print $0}' trainN.arff > NewCOCOMIN_Wrapper_Train; # # SearchMethods="forward backward both"; # Rankings="native random cor_hi cor_lo var_hi var_lo ent_hi ent_lo mmre_hi mmre_lo med_mre_hi med_mre_lo pred_hi pred_lo"; # Evaluations="mmre pred30 corr median_mre sd_mre"; # # for search in $SearchMethods # do # for ranking in $Rankings # do # for evaluation in $Evaluations # do # WRAPPER="NewCOCOMINWrapper-$search-$ranking-$evaluation"; # WrapperAttributes=`./cocomin81 NewCOCOMIN_Wrapper_Train "$search" "$ranking" "$evaluation" $Seed`; # # cat trainN.arff | gawk -f attributeSelector.awk OutputFile=NewCOCOMIN_Wrapper_Train.arff SelectedAttributes=$WrapperAttributes # cat testN.arff | gawk -f attributeSelector.awk OutputFile=NewCOCOMIN_Wrapper_Test.arff SelectedAttributes=$WrapperAttributes # # #---------- Any Attribute Learners For COCOMIN Wrapper # numberOfAttributes=`./attributes NewCOCOMIN_Wrapper_Train.arff`; # for Learner in $AnyAttributeLearners; # do # Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},${Learner}"; # . $Sandbox/$Learner NewCOCOMIN_Wrapper_Train.arff NewCOCOMIN_Wrapper_Test.arff; # done # done # done # done #---------- WRAPPER With LSR as the target learner WRAPPER="LSRWrapper" arffWithNums $Num test.arff > testN.arff; arffWithNums $Num train.arff > trainN.arff; numberToLog trainN.arff > LSR_Wrapper_trainL.arff; LSRWrapper LSR_Wrapper_trainL.arff > LSRWrapperResults; LastAttribute=`./attributes trainN.arff` #This is required to ensure that the last two attributes (in this case KLOC and ActualEffort) are included. #KLOC is sometimes ignored and this is unacceptable since this wrapper has no concept of KLOC as a special attribute. LSRWrapperAttributes=`grep "Selected attributes:" LSRWrapperResults | sed 's/Selected attributes: //' | sed 's/ : [0-9]*//' | sed 's/ //g' | gawk 'BEGIN {FS=OFS=",";} {split($0, attributes,","); n=asort(attributes); for (i=1; i<=n; i++) if (attributes[i] != LastAttribute-1) printf ("%d,",attributes[i]); printf ("%d,%d\n",LastAttribute-1,LastAttribute); }' LastAttribute=$LastAttribute;` filterAttributes $LSRWrapperAttributes trainN.arff LSR_Wrapper_Train.arff; filterAttributes $LSRWrapperAttributes testN.arff LSR_Wrapper_Test.arff; #---------- Any Attribute Learners For LSR Wrapper numberOfAttributes=`./attributes LSR_Wrapper_Train.arff`; for Learner in $AnyAttributeLearners; do Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},${Learner}"; . $Sandbox/$Learner LSR_Wrapper_Train.arff LSR_Wrapper_Test.arff; done #---------- WRAPPER With M5P as the target learner WRAPPER="M5PWrapper" arffWithNums $Num test.arff > testN.arff; arffWithNums $Num train.arff > trainN.arff; numberToLog trainN.arff > M5P_Wrapper_trainL.arff; M5PWrapper M5P_Wrapper_trainL.arff > M5PWrapperResults; LastAttribute=`./attributes trainN.arff` #This is required to ensure that the last two attributes (in this case KLOC and ActualEffort) are included. #KLOC is sometimes ignored and this is unacceptable since this wrapper has no concept of KLOC as a special attribute. M5PWrapperAttributes=`grep "Selected attributes:" M5PWrapperResults | sed 's/Selected attributes: //' | sed 's/ : [0-9]*//' | sed 's/ //g' | gawk 'BEGIN {FS=OFS=",";} {split($0, attributes,","); n=asort(attributes); for (i=1; i<=n; i++) if (attributes[i] != LastAttribute-1) printf ("%d,",attributes[i]); printf ("%d,%d\n",LastAttribute-1,LastAttribute); }' LastAttribute=$LastAttribute;` filterAttributes $M5PWrapperAttributes trainN.arff M5P_Wrapper_Train.arff; filterAttributes $M5PWrapperAttributes testN.arff M5P_Wrapper_Test.arff; #---------- Any Attribute Learners For M5P Wrapper numberOfAttributes=`./attributes M5P_Wrapper_Train.arff`; for Learner in $AnyAttributeLearners; do Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},${Learner}"; . $Sandbox/$Learner M5P_Wrapper_Train.arff M5P_Wrapper_Test.arff; done #---------- WRAPPER With LSR as the target learner but without logging WRAPPER="LSRWrapperNoLog" arffWithNums $Num test.arff > testN.arff; arffWithNums $Num train.arff > trainN.arff; LSRWrapper trainN.arff > LSRWrapperResults; LastAttribute=`./attributes trainN.arff` #This is required to ensure that the last two attributes (in this case KLOC and ActualEffort) are included. #KLOC is sometimes ignored and this is unacceptable since this wrapper has no concept of KLOC as a special attribute. LSRWrapperAttributes=`grep "Selected attributes:" LSRWrapperResults | sed 's/Selected attributes: //' | sed 's/ : [0-9]*//' | sed 's/ //g' | gawk 'BEGIN {FS=OFS=",";} {split($0, attributes,","); n=asort(attributes); for (i=1; i<=n; i++) if (attributes[i] != LastAttribute-1) printf ("%d,",attributes[i]); printf ("%d,%d\n",LastAttribute-1,LastAttribute); }' LastAttribute=$LastAttribute;` filterAttributes $LSRWrapperAttributes trainN.arff LSR_Wrapper_Train.arff; filterAttributes $LSRWrapperAttributes testN.arff LSR_Wrapper_Test.arff; #---------- Any Attribute Learners For LSR Wrapper numberOfAttributes=`./attributes LSR_Wrapper_Train.arff`; for Learner in $AnyAttributeLearners; do Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},${Learner}"; . $Sandbox/$Learner LSR_Wrapper_Train.arff LSR_Wrapper_Test.arff; done #---------- WRAPPER With M5P as the target learner but without logging WRAPPER="M5PWrapperNoLog" arffWithNums $Num test.arff > testN.arff; arffWithNums $Num train.arff > trainN.arff; M5PWrapper trainN.arff > M5PWrapperResults; LastAttribute=`./attributes trainN.arff` #This is required to ensure that the last two attributes (in this case KLOC and ActualEffort) are included. #KLOC is sometimes ignored and this is unacceptable since this wrapper has no concept of KLOC as a special attribute. M5PWrapperAttributes=`grep "Selected attributes:" M5PWrapperResults | sed 's/Selected attributes: //' | sed 's/ : [0-9]*//' | sed 's/ //g' | gawk 'BEGIN {FS=OFS=",";} {split($0, attributes,","); n=asort(attributes); for (i=1; i<=n; i++) if (attributes[i] != LastAttribute-1) printf ("%d,",attributes[i]); printf ("%d,%d\n",LastAttribute-1,LastAttribute); }' LastAttribute=$LastAttribute;` filterAttributes $M5PWrapperAttributes trainN.arff M5P_Wrapper_Train.arff; filterAttributes $M5PWrapperAttributes testN.arff M5P_Wrapper_Test.arff; #---------- Any Attribute Learners For M5P Wrapper numberOfAttributes=`./attributes M5P_Wrapper_Train.arff`; for Learner in $AnyAttributeLearners; do Prefix="$R,$WRAPPER,${stem},${Num},${numberOfAttributes},${Learner}"; . $Sandbox/$Learner M5P_Wrapper_Train.arff M5P_Wrapper_Test.arff; done done done done } generate > $Log