#!/bin/bash

# Use this script to generate data to figure out good .cfg option values for
# dataset $1.  In the .data file generated, each instance is a run of tar3
# with different .cfg option values.  Attributes are the values and the class
# is the worth of the best treatment found with those .cfg option values,
# rounded to 1 decimal place.  After running this script run "bin/tar3 cfgsim"
# to see what .cfg option values are good.

# $1 - Stem with path of the data file to configure Tar3 on.
# $2 - Location of the Tar3 learner.

# Set i to a random integer from the interval [$1,($1+$2)].
rand_int() {
  let i=$1+$RANDOM%$2
}

# Backup $1.cfg file if it exists; delete old .data file written by this script.
mv $1.cfg $1-old.cfg
rm cfgsim.data

# Make 100 instances.  Each is a list of .cfg option values followed by the best
# treatment's worth.
for ((j = 0; j < 10; j++));
do
  # Set granularity to 2,3...5 (start a new $1.cfg file).
  rand_int 2 5;              s="$i";     echo "granularity: $i"   > $1.cfg

  # Set maxNumber to 5,10...25 (append to $1.cfg).
  rand_int 1 5; let i=$i*5;  s="$s, $i"; echo "maxNumber: $i"    >> $1.cfg

  rand_int 1 3;              s="$s, $i"; echo "minSize: $i"      >> $1.cfg
  rand_int 3 3;              s="$s, $i"; echo "maxSize: $i"      >> $1.cfg
  rand_int 1 5; let i=$i*5;  s="$s, $i"; echo "randomTrials: $i" >> $1.cfg
  rand_int 1 5; let i=$i*5;  s="$s, $i"; echo "futileTrials: $i" >> $1.cfg
  rand_int 1 5; let i=$i*10; s="$s, $i"; echo "bestClass: $i%"   >> $1.cfg

  # Run tar3 and set t to the worth of the best treatment, to 1 decimal place.
  t=$($2 $1 | \
    gawk '/Worth=/ {if (++i==2) {gsub("Worth=","");printf("%1.1f\n",$0)} }')

  # If valid worth value found, echo the instance line to stdout and .data file.
  if [ "$t" != "" ];
  then
    echo "$s, $t"
    echo "$s, $t" >> cfgsim.data
  fi
done

# Read .data file to get classes (worth values); sort them low to high and
# start a new .names file beginning with them.
echo $(cat cfgsim.data | gawk '{if (!a[$8]) a[$8]=$8} END {n=asort(a); \
  for (i=1;i<n;i++) printf("%s, ", a[i]); printf("%s", a[n])}') > cfgsim.names

# Finish the .names file.
echo "granularity: continuous"  >> cfgsim.names
echo "maxNumber: continuous"    >> cfgsim.names
echo "minSize: continuous"      >> cfgsim.names
echo "maxSize: continuous"      >> cfgsim.names
echo "randomTrials: continuous" >> cfgsim.names
echo "futileTrials: continuous" >> cfgsim.names
echo "bestClass:continuous"     >> cfgsim.names
