#!/usr/bin/gawk -f BEGIN { FS = OFS = ","; Hypothesis = 0; # RawResultsFileName = "rawResults"; RanksFileName = "rankResults"; } NF==10 { Results[0,0] = 0; Wrapper = $2; Numeric = $4; Learner = $6; #this is the header to be printed based on the key header = "key"; Got = $(NF-1); Want = $(NF); Mre = (Got-Want)/Want; Mre = (Mre < 0 ? -1*Mre : Mre)*100; if (Learner == "lcFullManualStratification") OutputLearner = "ManualStratificationLC"; else if (Learner == "nearestNeighborMre") OutputLearner = "NearestNeighborWithMRE"; else if (Learner == "nearestNeighborMe") OutputLearner = "NearestNeighborWithME"; else if (Learner == "locomoFull-3" || Learner == "locomoFull-5" || Learner == "locomoFull-10" || Learner == "locomoFull-20" || Learner == "locomoFull-40" || Learner == "locomoFull-80") OutputLearner = "LOCOMOFull"; else if (Learner == "locomo-3" || Learner == "locomo-5" || Learner == "locomo-10" || Learner == "locomo-20" || Learner == "locomo-40" || Learner == "locomo-80") OutputLearner = "LOCOMO"; else if (Learner == "m5p" || Learner == "m5pFull") OutputLearner = "M5P"; else if (Learner == "lsr" || Learner == "lsrFull") OutputLearner = "LSR"; else if (Learner == "lc" || Learner == "lcFull") OutputLearner = "LC"; else if (Learner == "cocomost") OutputLearner = "COCOMOST"; else if (Learner == "cocomin") OutputLearner = "COCOMIN"; else if (Learner == "sd") OutputLearner = "SD"; else if (Learner == "org") OutputLearner = "ORG"; else if (Learner == "e") OutputLearner = "E"; #no hypothesis (full comparison) if (Hypothesis == 0) { key = $2 FS $4 FS $6; header = "wrapper,numeric,learner"; MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } #hypothesis 1 else if (Hypothesis == 1) { if (Wrapper == "None" && Numeric == "precise" && Learner == "lcFull") key = "FullLC"; else key = "Others"; MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } #hypothesis 2 else if (Hypothesis == 2) { if (OutputLearner == "ManualStratificationLC" || OutputLearner == "LOCOMO" || OutputLearner == "LOCOMOFull" || OutputLearner == "LC" || OutputLearner == "COCOMOST" || OutputLearner == "COCOMIN" || OutputLearner == "SD" || OutputLearner == "ORG" || OutputLearner == "E") key = "LearnerLogging"; else if (OutputLearner == "M5P" || OutputLearner == "LSR" || OutputLearner == "NearestNeighborWithMRE" || OutputLearner == "NearestNeighborWithME") key = "NoLearnerLogging"; MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } #hypothesis 3 else if (Hypothesis == 3) { if (OutputLearner == "ManualStratificationLC" || OutputLearner == "LOCOMO" || OutputLearner == "LOCOMOFull" || OutputLearner == "LC" || OutputLearner == "COCOMOST" || OutputLearner == "COCOMIN" || OutputLearner == "SD" || OutputLearner == "ORG" || OutputLearner == "E" || OutputLearner == "LSR" || OutputLearner == "NearestNeighborWithMRE" || OutputLearner == "NearestNeighborWithME") key = "Linear"; else if (OutputLearner == "M5P") key = "NonLinear"; MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } #hypothesis 4 else if (Hypothesis == 4) { if (OutputLearner == "ManualStratificationLC" || OutputLearner == "LOCOMO" || OutputLearner == "LOCOMOFull" || OutputLearner == "LC" || OutputLearner == "COCOMOST" || OutputLearner == "COCOMIN" || OutputLearner == "SD" || OutputLearner == "ORG" || OutputLearner == "E") key = "LC-Based"; else if (OutputLearner == "M5P" || OutputLearner == "LSR" || OutputLearner == "NearestNeighborWithMRE" || OutputLearner == "NearestNeighborWithME") key = "Non-LC-Based"; MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } #hypothesis 5 else if (Hypothesis == 5) { if (Numeric == "precise") key = "Precise"; else key = "Proximal"; MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } #hypothesis 6 else if (Hypothesis == 6) { if (Wrapper == "None") OutputWrapper = "NoColumnPruning"; else OutputWrapper = "SomeColumnPruning"; if (OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME" && OutputWrapper == "SomeColumnPruning") key = "SomeColumnPruning"; else if (OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME" && OutputWrapper == "NoColumnPruning") key = "NoColumnPruning"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 7 else if (Hypothesis == 7) { if ((OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME") && (Wrapper == "M5PWrapperNoLog" || Wrapper == "LSRWrapperNoLog" || Wrapper == "LocalWrapper" || Wrapper == "LSRWrapper" || Wrapper == "M5PWrapper")) key = "BestSearchColumnPruning"; else if ((OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME") && Wrapper == "COCOMINWrapper") key = "NonBestSearchColumnPruning"; else key = "NotAllowed" if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 8 else if (Hypothesis == 8) { if ((OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME") && (Wrapper == "LSRWrapperNoLog" || Wrapper == "LSRWrapper")) key = "LSRWrapper"; else if ((OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME") && (Wrapper == "M5PWrapperNoLog" || Wrapper == "M5PWrapper")) key = "M5PWrapper"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 9 else if (Hypothesis == 9) { if ((OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME") && (Wrapper == "M5PWrapperNoLog" || Wrapper == "LSRWrapperNoLog" || Wrapper == "LSRWrapper" || Wrapper == "M5PWrapper")) key = "NonLCBasedColumnPruning"; else if ((OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME") && (Wrapper == "COCOMINWrapper" || Wrapper == "LocalWrapper")) key = "LCBasedColumnPruning"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 10 else if (Hypothesis == 10) { if ((OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME") && (Wrapper == "M5PWrapper" || Wrapper == "LSRWrapper" || Wrapper == "LocalWrapper" || Wrapper == "COCOMINWrapper")) key = "WrapperLogged"; else if ((OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME") && (Wrapper == "M5PWrapperNoLog" || Wrapper == "LSRWrapperNoLog")) key = "WrapperNotLogged"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 11 else if (Hypothesis == 11) { if (Wrapper == "None") { if (OutputLearner == "LOCOMOFull" || OutputLearner == "NearestNeighborWithMRE" || OutputLearner == "NearestNeighborWithME") stratification = "AutomaticStratification"; else if (OutputLearner == "ManualStratificationLC") stratification = "ManualStratification"; else if (Learner == "lcFull") stratification = "NoStratification"; else stratification = "NotAllowed"; } else stratification = "NotAllowed"; if (stratification == "AutomaticStratification") key = "AutomaticStratification"; else if (stratification == "ManualStratification" || stratification == "NoStratification") key = "Others"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 12 else if (Hypothesis == 12) { if (Wrapper == "None") { if (OutputLearner == "LOCOMOFull" || OutputLearner == "NearestNeighborWithMRE" || OutputLearner == "NearestNeighborWithME") stratification = "AutomaticStratification"; else if (OutputLearner == "ManualStratificationLC") stratification = "ManualStratification"; else if (Learner == "lcFull") stratification = "NoStratification"; else stratification = "NotAllowed"; } else stratification = "NotAllowed"; if (stratification == "ManualStratification") key = "ManualStratification"; else if (stratification == "AutomaticStratification" || stratification == "NoStratification") key = "Others"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 13 else if (Hypothesis == 13) { if (Wrapper == "None") { if (OutputLearner == "LOCOMOFull" || OutputLearner == "NearestNeighborWithMRE" || OutputLearner == "NearestNeighborWithME") stratification = "AutomaticStratification"; else if (OutputLearner == "ManualStratificationLC") stratification = "ManualStratification"; else if (Learner == "lcFull") stratification = "NoStratification"; else stratification = "NotAllowed"; } else stratification = "NotAllowed"; if (stratification == "NoStratification") key = "NoStratification"; else if (stratification == "AutomaticStratification" || stratification == "ManualStratification") key = "Others"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 14 else if (Hypothesis == 14) { if (Wrapper == "None") { if (OutputLearner == "LOCOMOFull") key = "FixedAutomaticStratification"; else if (OutputLearner == "NearestNeighborWithMRE" || OutputLearner == "NearestNeighborWithME") key = "DynamicAutomaticStratification"; else key = "NotAllowed"; } else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 15 else if (Hypothesis == 15) { if (Learner == "locomoFull-80") key = "Neighborhood80"; else if (Learner == "lcFull") key = "NeighborhoodAll"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 16 else if (Hypothesis == 16) { if (Learner == "locomoFull-80") key = "Neighborhood80"; else if (Learner == "locomoFull-40") key = "Neighborhood40"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 17 else if (Hypothesis == 17) { if (Learner == "locomoFull-40") key = "Neighborhood40"; else if (Learner == "locomoFull-20") key = "Neighborhood20"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 18 else if (Hypothesis == 18) { if (Learner == "locomoFull-20") key = "Neighborhood20"; else if (Learner == "locomoFull-10") key = "Neighborhood10"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 19 else if (Hypothesis == 19) { if (Learner == "locomoFull-10") key = "Neighborhood10"; else if (Learner == "locomoFull-5") key = "Neighborhood5"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 20 else if (Hypothesis == 20) { if (Learner == "locomoFull-5") key = "Neighborhood5"; else if (Learner == "locomoFull-3") key = "Neighborhood3"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 21 else if (Hypothesis == 21) { if (Wrapper == "None") OutputWrapper = "NoColumnPruning"; else OutputWrapper = "SomeColumnPruning"; if (OutputLearner == "LOCOMOFull" || OutputLearner == "LOCOMO" || OutputLearner == "ManualStratificationLC" || OutputLearner == "NearestNeighborWithMRE" || OutputLearner == "NearestNeighborWithME" || OutputWrapper == "SomeColumnPruning") key = "SomePruning"; else key = "NoPruning"; MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } #hypothesis 22 else if (Hypothesis == 22) { #??????? #MreArray[key,++MreArray[key,0]] = Mre; #N[key]++; } #hypothesis 100 (row pruning analysis) else if (Hypothesis == 100) { if (Wrapper == "None" && Numeric == "precise") { if (OutputLearner == "LOCOMOFull" || OutputLearner == "NearestNeighborWithMRE" || OutputLearner == "NearestNeighborWithME" || OutputLearner == "ManualStratificationLC" || Learner == "lcFull" || Learner == "dynamicLocomoMean" || Learner == "dynamicLocomoMedian") key = Learner; else key = "NotAllowed"; } else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 101 (column pruning analysis) else if (Hypothesis == 103) { if (Wrapper == "M5PWrapper" && Learner == "m5p" && Numeric == "precise") key = "M5PWrapper"; else if (Wrapper == "LSRWrapper" && Learner == "lsr" && Numeric == "precise") key = "LSRWrapper"; else if (Wrapper == "M5PWrapperNoLog" && Learner == "m5p" && Numeric == "precise") key = "M5PWrapperNoLog"; else if (Wrapper == "LSRWrapperNoLog" && Learner == "lsr" && Numeric == "precise") key = "LSRWrapperNoLog"; else if (Wrapper == "LocalWrapper" && Learner == "lc" && Numeric == "precise") key = "LocalWrapper"; else if (Wrapper == "COCOMINWrapper" && Learner == "lc" && Numeric == "precise") key = "COCOMINWrapper"; else if (Wrapper == "None" && Learner == "lcFull" && Numeric == "precise") key = "NoWrapper"; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } #hypothesis 102 (learner analysis) else if (Hypothesis == 105) { if (Wrapper == "None" && Numeric == "precise" && OutputLearner != "LOCOMO" && OutputLearner != "LOCOMOFull" && OutputLearner != "ManualStratificationLC" && OutputLearner != "NearestNeighborWithMRE" && OutputLearner != "NearestNeighborWithME") key = Learner; else key = "NotAllowed"; if (key != "NotAllowed") { MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } } } END { # print "keyCounter, firstKey, N[firstKey], secondKey, N[secondKey], firstKeyZ, secondKeyZ" > RawResultsFileName; for (firstKey in N) { for (secondKey in N) { if (firstKey != secondKey) { #this is to assure that two keys are not compared to each other twice newKey = firstKey FS secondKey; newKeyReverse = secondKey FS firstKey; if (!tempKeyArray[newKey] && !tempKeyArray[newKeyReverse]) { keyCounter++; tempKeyArray[newKey] = 1; analyze(firstKey, secondKey); } } } } print ""; print header,"ties,wins,losses" > RanksFileName; for (key in N) { print key, tie[key]+0, winMedianRank[key]+0, lossMedianRank[key]+0 >> RanksFileName; } } function analyze(firstKey, secondKey, tempArray, tempRanksArray, tempFirstArray, tempSecondArray) { #put the raw data from the two sets together for (firstCounter = 1; firstCounter <= N[firstKey]; firstCounter++) { tempArray[firstCounter] = MreArray[firstKey,firstCounter]; } #use firstCounter-1 as the index from which it starts adding the second group of data firstCounter--; for (secondCounter = 1; secondCounter <= N[secondKey]; secondCounter++) { tempArray[firstCounter+secondCounter] = MreArray[secondKey,secondCounter]; } #sort the array containing both set's raw data asort(tempArray); #generate the ranks and resolve the ties for (counter = 1; counter <= N[firstKey]+N[secondKey]; counter++) { sameRankIndex = counter + 1; tempIndexSum = counter; while (sameRankIndex < N[firstKey]+N[secondKey] && tempArray[counter] == tempArray[sameRankIndex]) { tempIndexSum = tempIndexSum + sameRankIndex; sameRankIndex++; } #decrement by 1 since the last addition did not result in a equality or was out of range sameRankIndex--; #this means that no ties were seen if (sameRankIndex == counter) { tempRanksArray[counter,1] = tempArray[counter]; tempRanksArray[counter,2] = counter*1.0; } #this means that there were ties (at least between two of them) else { newRankIndex = tempIndexSum / (sameRankIndex - counter + 1); for (tempCounter = counter; tempCounter <= sameRankIndex; tempCounter++) { tempRanksArray[tempCounter,1] = tempArray[counter]; tempRanksArray[tempCounter,2] = newRankIndex*1.0; } #it should continue from here (already incremented so it is decremented so the main for loop can increment it correctly) counter = tempCounter - 1; } } #calculate the sums of ranks for each group firstRankSum = 0.0; secondRankSum = 0.0; searchRankSum = 0.0; #choose the smaller of the two for the search (for speed purposes). The other one can be calculated using the formula if (N[firstKey] <= N[secondKey]) searchKey = firstKey; else searchKey = secondKey; for (counter = 1; counter <= N[firstKey]+N[secondKey]; counter++) { for (searchCounter = 1; searchCounter <= N[searchKey]; searchCounter++) { if (tempRanksArray[counter,1] == MreArray[searchKey,searchCounter]) { searchRankSum = searchRankSum + tempRanksArray[counter,2]; #so it skips checking the rest since there is no point searchCounter = N[searchKey]; } } } if (searchKey == firstKey) { firstKeySum = searchRankSum; secondKeySum = (N[firstKey]+N[secondKey])*(N[firstKey]+N[secondKey]+1)/2 - searchRankSum; } else { firstKeySum = (N[firstKey]+N[secondKey])*(N[firstKey]+N[secondKey]+1)/2 - searchRankSum; secondKeySum = searchRankSum; } firstKeyU = firstKeySum - N[firstKey]*(N[firstKey]+1)/2; secondKeyU = secondKeySum - N[secondKey]*(N[secondKey]+1)/2; m = N[firstKey]; n = N[secondKey]; meanU = m*n/2; sdU = (m*n*(m+n+1)/12)^(0.5); firstKeyZ = (firstKeyU - meanU)/sdU; secondKeyZ = (secondKeyU - meanU)/sdU; # print keyCounter, firstKey, N[firstKey], secondKey, N[secondKey], firstKeyZ, secondKeyZ > RawResultsFileName; printf ("%d ", keyCounter); #since the two keys are equal but different in sign, one is enough to be compared to the critical value 0f 1.96 at 95% confidence if (firstKeyZ > secondKeyZ) Z = firstKeyZ; else Z = secondKeyZ; if (Z >= 0 && Z <= 1.96) { tie[firstKey]++; tie[secondKey]++; } else { #the rest is used to find win vs. loss based on the median of the ranks for (firstCounter = 1; firstCounter <= N[firstKey]; firstCounter++) { tempFirstArray[firstCounter] = MreArray[firstKey,firstCounter]; } for (secondCounter = 1; secondCounter <= N[secondKey]; secondCounter++) { tempSecondArray[secondCounter] = MreArray[secondKey,secondCounter]; } asort(tempFirstArray); asort(tempSecondArray); if (N[firstKey] % 2 == 1) firstMedianIndex = (N[firstKey]+1)/2; else firstMedianIndex = N[firstKey]/2; if (N[secondKey] % 2 == 1) secondMedianIndex = (N[secondKey]+1)/2; else secondMedianIndex = N[secondKey]/2; for (counter = 1; counter <= N[firstKey]+N[secondKey]; counter++) { if (tempRanksArray[counter,1] == tempFirstArray[firstMedianIndex]) { firstMedianRank = tempRanksArray[counter,2]; counter = N[firstKey]+N[secondKey]; } } for (counter = 1; counter <= N[firstKey]+N[secondKey]; counter++) { if (tempRanksArray[counter,1] == tempSecondArray[secondMedianIndex]) { secondMedianRank = tempRanksArray[counter,2]; counter = N[firstKey]+N[secondKey]; } } #since the ranking is about MREs, we need lower ranks as winners which give us lower MREs if (firstMedianRank < secondMedianRank) { winMedianRank[firstKey]++; lossMedianRank[secondKey]++; } else if (firstMedianRank > secondMedianRank) { lossMedianRank[firstKey]++; winMedianRank[secondKey]++; } else { tie[firstKey]++; tie[secondKey]++; } } }