#!/usr/bin/gawk -f BEGIN { FS=OFS=","; } NF==2 && FILENAME==ARGV[1] { DatasetSize[$2]=$1-2; } NF==10 && FILENAME==ARGV[2] { Results[0,0]= 0; key = $2 FS $3 FS $4 FS $6; Got = $(NF-1); Want = $(NF); Mre = (Got-Want)/Want; Mre = (Mre < 0 ? -1*Mre : Mre)*100; MreArray[key,++MreArray[key,0]] = Mre; N[key]++; } END { if (FILENAME==ARGV[2]) { print "Wrapper,Dataset,Numeric,Learner,DatasetSize,Records,Median(MRE),33%,66%,100%,spread(33%<->66%),spread(66%<->100%)"; for(key in N) { findResults(key); split(key,parsedKey,FS); print key,DatasetSize[parsedKey[2]],N[key],Results[key,1],Results[key,2],Results[key,3],Results[key,4],Results[key,3]-Results[key,2],Results[key,4]-Results[key,3]; } } } function findResults(key) { for (counter=1; counter<=MreArray[key,0]; counter++) tempArray[counter]=MreArray[key,counter]; asort(tempArray); #This calculates the median if (MreArray[key,0]%2 == 0) Results[key,1] = (tempArray[MreArray[key,0]/2]+tempArray[(MreArray[key,0]+2)/2])/2; else Results[key,1] = tempArray[(MreArray[key,0]+1)/2]; #This finds the 33 percentile Results[key,2] = tempArray[int(MreArray[key,0]/3)]; #This finds the 66 percentile Results[key,3] = tempArray[int(2*MreArray[key,0]/3)]; #This finds the 100 percentile Results[key,4] = tempArray[MreArray[key,0]]; }