/* 
 * This program will discretize all of the real or integer attributes in an arff file.
 * It will output a new arff file that has all of the variables equal frequency discretized.
 * Command-Line Inputs:
 * -n = number - The number of bins to discretize into.
 * -i = file - The input file.
 * -o = file - The output file.
 */

#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <sstream>
using namespace std;

void toUpper( string &str )
{
  for ( int c = 0; c < str.length(); c++ )
    str[c] = toupper( str[c] ) ;
}

template < class T >
string toString( T data )
{
  string toRet;
  stringstream ss( stringstream::in | stringstream::out );
  ss << data;
  ss >> toRet;
  return toRet;
} 

int main( int argc, char *argv[] )
{
  int nBins = 10;
  string inFileName = "in.arff", outFileName = "out.arff";
  ofstream outFile;
  ifstream inFile;
  vector< string >  attrs;
  vector< vector< string > > data;
  vector< int > indecies;
  bool isData = false;
  int numAttrs = 0;
  char opt;

  // Get the different command line options.
  // Exits on unrecognized option.
  for ( int i = 1; i < argc; i+=2 )
  {
    opt = argv[i][1];
    switch( opt )
    {
      case 'n': nBins = atoi( argv[i+1]); break;
      case 'i': inFileName = argv[i+1]; break;
      case 'o': outFileName = argv[i+1]; break;
      default: cout << "Incorrect option -" << opt << ". Exiting." << endl; exit( 1 ); break; 
    }
  }
  cout << "Options:\n" << "nBins = "  << nBins << "\nIn File = " << inFileName << "\nOut File = " << outFileName << "\n\n";

  inFile.open( inFileName.c_str() );
  string c;
  do
  {
    getline( inFile, c );
    if ( c[0] != '%' )
    {
      toUpper( c );
      if ( c.find( "REAL" ) != string::npos ) //|| c.find( "INTEGER" ) != string::npos || c.find( "NUMBER" ) != string::npos )
      {
	c = c.substr( 0, c.find( "REAL" ) );
	c += "{\'_1_\'";
	for ( int j = 2; j <= nBins; j++ )
	  c += ",\'_" + toString( j ) + "_\'";
	c += "}";
      }
      attrs.push_back( c );
      cout << c << endl;
    }
  } while ( c.find( "@DATA" ) == string::npos  );

  return 0;
}
