BEGIN { Bins=5; Inf = 10^32 } ## -------------------------------------------------- #Data entry. Pretty routine stuff. /@relation / {Relation=$2} /@attribute/ {Name[++Name[0]]=$2; Name[$2] = Name[0]; Num[$2] = $3 !~ /{/ if (Num[$2]) { Max[$2] = -1 * Inf Min[$2] = Inf } } {gsub(/[ \t]*/,"")} # no blanks {gsub(/%.*/,"")} # no comments /^$/ {next} # no blank likes /@data/ {In=1;OFS=FS=","} /@/ {next} In {Rows++; train(Rows,Data)} END { print "@relation " Relation "\n" attributes(Name) print "\n@data\n" cells(Rows,Data,Name[0]) } function train(row,d, what,i) { for(i=1;i<=NF;i++) { what = Name[i] d[row,what]=$i if ($i == "?") continue if (Num[what]) { if ($i > Max[what]) Max[what]= $i if ($i < Min[what]) Min[what]= $i } } } function cell1(n,col, width,i,low) { width = (Max[col] - Min[col]) / Bins i = int((n - Min[col]) / width) low = Min[col] + i*width return sprintf(OFMT".."OFMT,low,low + width) } function range(i,col, width,low) { width = (Max[col] - Min[col])/ Bins low = Min[col] + i*width return sprintf(OFMT".."OFMT,low,low + width) } function attributes(name, what,i,line,j) { for(i=1;i<=name[0];i++) { what=Name[i] line= "@attribute " name[i] if (Num[what]) { line = line " { " range(1,what) for(j=2;j<=Bins;j++) line = line " , " range(j,what) line = line " }" } else { line = line " discrete "} print line } } function cells(rows,data,cols, row,col,line,what) { for(row=1;row<=rows;row++) { what=Name[1] line=cell(what,data[row,what],1,cols) for(col=2;col<=cols;col++) { what=Name[col] line=line OFS cell(what,data[row,what],col,cols) } print line } } function cell(what,c,col,cols) { if (cols==col) return c return Num[what] ? cell1(c,what) : c } function o(a,s,what, i,com) { print "" com = what ? "sort -t, " what : "sort -t, " for(i in a) print s " [ " i " ]= " a[i] | com; close(com) }