#include <stdio.h>
#include <time.h>
#include <string.h>
#include <math.h>
#include "Stack.c"
#include "pickTwo.c"
#include "exaustCombine.c"
#include "PickObject.c"

// Define the scoring methods. Essentially an enumerated data type.
#define LIFT 0
#define INFOGAIN 1
#define PROBSUPT 2
#define EFFEST 3

// Define the picking methods.  Essentially an enumerated data type.
#define PICK 0
#define EXAUST 1
#define PROBSUPT 2

// Define the discretization methods.  Essentially an enumerated data type.
#define EQUALRANGE 0
#define EQUALFREQ 1

/* Commenting these might be a temporary fix to a big problem, right now I get no errors with it like this
 * but that doesn't mean it is right.  I did this because getnames has these includes in them so it shouldn't matter.
 * I also commented out these three lines in getdata.c, so problems could arise from that as well.
 * #include "defns.i"
 * #include "types.i"
 * #include "extern.i"
 */
#include "getnames.c"
#include "getdata.c"
#include "nBins.c"
#include "eqFr.c"
#include "score.c"


/*  External data, described in extern.i  */
short		MaxAtt, MaxClass, MaxDiscrVal = 2;
ItemNo		MaxItem;
Description	*Item;
DiscrValue	*MaxAttVal;
char		*SpecialStatus;
String		*ClassName, *AttName, **AttValName, FileName = "DF";

/* Lift Globals */
float BaseLift;
int BestClassCount;
float BestSupport;
float ScoreChange;

/* a^2/(a+b) Globals */
int best, rest;
int **aCounts, **bCounts;
//int aCounts[50][10], bCounts[50][10];

/* EffEst Globals */
int LOC, *LOCs;
float Alpha, Beta, Gamma;

void printRule( PickObject *po );

PickObject* readRuleFromFile( char *fName )
{
  FILE *f = fopen( fName, "r" );
  PickObject *po = NULL, *t1 = NULL, *t2 = NULL;
  int att, attVal, done = 0, numChars = 0;
  float vals[3];
  char *currTok = (char*)malloc( sizeof( char ) * 2 ), c;
  currTok[0] = '\0';
  
  while ( !done )
  {
    // Get the attribute
    c = fgetc(f);
    numChars = 0;
    while ( c != ':' && c != '\n' )
    {
      numChars++;
      currTok = (char*)realloc( currTok, sizeof( char ) * ( numChars + 1 ) );
      currTok[numChars-1] = c;
      currTok[numChars] = '\0';
      c = fgetc( f );
    }
    if ( strcmp( currTok, "END" ) == 0 ) return po;
    att = Which( currTok, AttName, 0, MaxAtt );

    // Read in the attribute values
    while ( c != '\n' )
    {
      // Read the token
      c = fgetc( f );
      numChars = 1;
      while ( c != ',' && c != '\n' )
      {
	currTok = (char*)realloc( currTok, sizeof( char ) * (numChars+1) );
	currTok[numChars-1] = c;
	currTok[numChars] = '\0';
	numChars++;
	c = fgetc( f );
      }
      attVal = Which( currTok, AttValName[att], 1, MaxAttVal[att] );
      vals[0] = att; vals[1] = attVal; vals[2] = 0;
      t1 = setupPickObject( vals );
      t2 = po;
      po = combinePickObject( t1, t2 );
      //if ( t1 != NULL ) { deletePickObject( t1 ); free( t1 ); }
      //if ( t2 != NULL ) { deletePickObject( t2 ); free( t2 ); }
    }
  }
    
  fclose( f );

  return po;
}

void printClassDist( void *i )
{
  PickObject *po = (PickObject*)i;
  float toRet = 0;
  int *freq = (int*)malloc( ( MaxClass + 1 ) * sizeof( int ) ), c, item, att, pairCount = 0, inst = 0, attVal;
  for ( c = 0; c < MaxClass+1; c++ ) freq[c] = 0;
  
  for ( item = 0; item < MaxItem+1; item++ )
  {
    // Determine if this line of data is included in our subset.
    // Our subset is determined by the attribute value pairs this
    // PickObject contains.
    pairCount = 0;
    for ( att = 0; att < po->mSize; att++ )
      for( attVal = 0; attVal < po->mPicked[att].mSize; attVal++ )
	if ( po->mPicked[att].mAttrVal[attVal] == Item[item][po->mPicked[att].mAttr]._discr_val )
	{
	  pairCount++;
	  attVal = po->mPicked[att].mSize;
	}

    if ( pairCount >= po->mSize )
    {
      inst++;
      freq[Class(Item[item])]++;
    }
  }

  printf( "%-30s%20s%20s\n", "Name", "Count", "Score" );
  printf( "%-30s%20s%20s\n", "----", "-----", "-----" );
  for ( c = 0; c < MaxClass+1; c++ )
  {
    printf( "%-30s%20d%20f\n", ClassName[c], freq[c], pow( ScoreChange, c+1 ) );
    toRet += ( freq[c] * pow( ScoreChange, c ) );
  }
}

void printData()
{
  printf( "\n\n" );
  int i, j, k;
  for ( i = 0; i < MaxItem; i++ )
  {
    printf( "%s, ", AttValName[0][Item[i][0]._discr_val] );
    for ( j = 1; j < MaxAtt+1; j++ )
    {
      if ( Item[i][j]._discr_val == 0 ) printf( "?, " );
      else printf( "%s, ", AttValName[j][Item[i][j]._discr_val] );
    }
    printf( "%s\n", ClassName[Class( Item[i] )] );
  }
  printf( "\n\n" );
}

void printAttributes()
{
  printf( "\n\n" );
  int att, attVal;
  for ( att = 0; att < MaxAtt + 1; att++ )
  {
    printf( "%s: %s", AttName[att], AttValName[att][1] );
    for ( attVal = 2; attVal < MaxAttVal[att] + 1; attVal++ )
      printf( ", %s%s", AttValName[att][attVal], attVal % 5 == 0 ? "\n" : "" );
    printf( "\n" );
  }
}

void printRuleFile( FILE *file, PickObject *po )
{
  int i, j;
  if ( SpecialStatus[po->mPicked[0].mAttr] == Nil )
    qsort( po->mPicked[0].mAttrVal, po->mPicked[0].mSize, sizeof( int ), compareInts );
  
  fprintf( file, "%-30s = [ ", AttName[po->mPicked[0].mAttr] );
  fprintf( file, "%s(%d)", AttValName[po->mPicked[0].mAttr][po->mPicked[0].mAttrVal[0]], po->mPicked[0].mAttrVal[0] );
  for ( j = 1; j < po->mPicked[0].mSize; j++ )
    fprintf( file, " OR %s(%d)", AttValName[po->mPicked[0].mAttr][po->mPicked[0].mAttrVal[j]], po->mPicked[0].mAttrVal[j] );
  fprintf( file, " ]\n" );

  for ( i = 1; i < po->mSize; i++ )
  {
    fprintf( file, "and %-26s = [ ", AttName[po->mPicked[i].mAttr] );
    fprintf( file, "%s(%d)", AttValName[po->mPicked[i].mAttr][po->mPicked[i].mAttrVal[0]], po->mPicked[i].mAttrVal[0] );
    for ( j = 1; j < po->mPicked[i].mSize; j++ )
      fprintf( file, " OR %s(%d)", AttValName[po->mPicked[i].mAttr][po->mPicked[i].mAttrVal[j]], po->mPicked[i].mAttrVal[j] );
    fprintf( file, " ]\n" );
  }

 fprintf( file, "%35cScore: %2.5f\n", ' ', po->mScore );
 if ( LOC != 0 )
 {
   fprintf( file, "%35cEffort: %2.5f\n", ' ', po->mEffort );
   fprintf( file, "%35cPD: %2.5f\n", ' ', po->mPD );
   fprintf( file, "%35cPF: %2.5f\n", ' ', po->mPF );
 }
}

void printRule( PickObject *po )
{
  int i, j;
  
  //scoreTmp( po );
  printf( "%-30s = [ ", AttName[po->mPicked[0].mAttr] );
  printf( "%s(%d)", AttValName[po->mPicked[0].mAttr][po->mPicked[0].mAttrVal[0]], po->mPicked[0].mAttrVal[0] );
  for ( j = 1; j < po->mPicked[0].mSize; j++ )
    printf( " OR %s(%d)", AttValName[po->mPicked[0].mAttr][po->mPicked[0].mAttrVal[j]], po->mPicked[0].mAttrVal[j] );
  printf( " ]\n" );
  
  for ( i = 1; i < po->mSize; i++ )
  {
    printf( "and %-26s = [ ", AttName[po->mPicked[i].mAttr] );
    printf( "%s(%d)", AttValName[po->mPicked[i].mAttr][po->mPicked[i].mAttrVal[0]], po->mPicked[i].mAttrVal[0] );
    for ( j = 1; j < po->mPicked[i].mSize; j++ )
      printf( " OR %s(%d)", AttValName[po->mPicked[i].mAttr][po->mPicked[i].mAttrVal[j]], po->mPicked[i].mAttrVal[j] );
    printf( " ]\n" );
  }

 printf( "%35cScore: %2.5f\n", ' ', po->mScore );
 if ( LOC != 0 )
 {
   printf( "%35cEffort: %2.5f\n", ' ', po->mEffort );
   printf( "%35cPD: %2.5f\n", ' ', po->mPD );
   printf( "%35cPF: %2.5f\n", ' ', po->mPF );
   printf( "%35cSupport: %2.5f\n", ' ', po->mSupport );
 }
}

void printStack( Stack *stack, int topX )
{
  printf( "\n\n\tThe Stack\n" );
  StackNode *c = stack->mTop;
  char *fmtStr = ( char *)malloc( 11 * sizeof( char ) );
  int x = 0, i, j;
  while ( c != NULL && x < topX )
  {
    PickObject *po = (PickObject*)(c->mData);
    
    printClassDist( po );
    printf( "\n" );
    //scoreTmp( po );
    printRule( po );
    c = c->mPrev;
    x++;
  }
  free( fmtStr );
  printf( "\n" );
}

/**
 * This method is invoked when either an unrecognized option is sent in or the user types the ? option.
 **/
void printHelp()
{
  printf( "Usage: ./which [OPTIONS]\n" );
  printf( "  [OPTIONS]\n" );
  printf( "    -?\t%-20s\n", "Help" );
  printf( "    -s\t%-20s\n", "Scoring Type (ps||li|ee)" );
  printf( "\t ig - %s\n", "InfoGain" );
  printf( "\t ps - %s\n", "Prob * Support" );
  printf( "\t li - %s\n", "Lift #" );
  printf( "\t\t # - Base of Lift score.\n" );
  printf( "\t ee - %s\n", "Effort, PD, PF #" );
  printf( "\t\t # - Attribute number for Lines of Code.\n" );
  printf( "    -f\t%-20s\n", "FileName $" );
  printf( "    -z\t%-20s\n", "Stack Size #" );
  printf( "-(q|a)\t%-20s\n", "Discretization Type #" );
  printf( "\t q - Equal Frequency\n" );
  printf( "\t a - Equal Range\n" );
  printf( "\t\t # - Number of Bins.\n" );
  printf( "    -r\t%-20s\n", "Number to Report #" );
  printf( "-(p|e)\t%-20s\n", "Number to Pick  #" );
  printf( "\t p - Number of random combines.\n" );
  printf( "\t e - Number of entire stack combines.\n" );
  printf( "    -b\t%-20s\n", "Best Support #" );
  printf( "    -o\t%-20s\n", "Check Old #1 #2" );
  printf( "\t\t #1 - 1 For True, 0 for False.\n" );
  printf( "\t\t #2 - Number of picks in between checks.\n" );
  printf( "    -test $\t- Which will only test the file on the read in data.\n" );
  printf( "\t $ - The file with the saved Which rule in it.\n" );
  printf( "    -disc\t Use which as a discretizer only.\n" );
  printf( "\n---Configuration Paramaters---\n" );
  printf( "    -alpha\t- Sets up the weight of PD.\n" );
  printf( "    -beta\t- Sets up the weight of PF.\n" );
  printf( "    -gamma\t- Sets up the weight of Effort.\n" );
  printf( "\n\n" );
}

/**
 * This is the main file of the Which algorithm.  It will handle the control of all other methods used.
 * @param argc The number of command-line arguments passed in.
 * @param argv A vector of strings that holds the commands passed in, argv[0] is ./which
 * @return 0 if exectution was a success, 1 otherwise.
 **/
int main( int argc, char *argv[] )
{
  // Set up the variables that the user can change with options.
  FileName = "cf";            // Defined in C4.5's data parsing code.
  int StackSize = -1,         // Size of the stack, -1 means the Stack has infinite size.
    PickNo = 0,               // Number of times to invoke the pickTwo module.
    ExaustNo = 0,             // Number of exaustive stack runs.
    PickType = PICK,          // Tells Which which way to pick and combine stack data.
    NBins = 10,               // Number of bins used to discritize the data.
    ReportNo = 5,             // The top # of StackElements to report.
    ScoringType = LIFT,       // The method for scoring the individual StackElements.
    CheckOld = 0,             // If this is one, we care about checking OldLift and terminating early.
    CheckEvery = 100,         // This is how many picks in between Old and current lift checks.
    DiscrMethod = EQUALRANGE, // The method of discretization.
    LOCAtt = 0,               // The attribute index of the lines of code attribute.( Used in Effort Estimation scoring )
    JustTest = 0,             // If this is true, don't learn anything, used the saved WhichRule. Otherwise, learn. 
    JustDisc = 0;             // If this is true, just create the discretized data file.
  float OldLift = 0.0;        // This is the lift at every 100 runs.  If it does not change by 20%                              // every 100 runs, Which wiill teminate and report that rule.

  char *whichRuleStr;
  
  int t = time( 0 );
  float (*score)(void*);
  BestSupport = 0.3;
  ScoreChange = 2;
  LOC = 0;
  PickNo = 0;
  Alpha = Beta = Gamma = 1;

  // Initialize the random timer.
  srand( time( 0 ) );
  
  /* Parse out the options as they are found and set the corresponding variables.
   * More options can be added as needed.
   */
  int optionNo;
  for ( optionNo = 1; optionNo < argc; optionNo += 2 )
  {
    if ( strcmp( argv[optionNo], "-p" ) == 0 ) 
    { PickNo = atoi( argv[optionNo+1] ); PickType = PICK; }
    else if ( strcmp( argv[optionNo], "-e" ) == 0 )
    { ExaustNo = atoi( argv[optionNo+1] ); PickType = EXAUST; }
    else if ( strcmp( argv[optionNo], "-q" ) == 0 )
    { DiscrMethod = EQUALFREQ; NBins = atoi( argv[optionNo+1] ); }
    else if ( strcmp( argv[optionNo], "-a" ) == 0 ) 
    { DiscrMethod = EQUALRANGE; NBins = atoi( argv[optionNo+1] ); }
    else if ( strcmp( argv[optionNo], "-r" ) == 0 ) 
    { ReportNo = atoi( argv[optionNo+1] ); }
    else if ( strcmp( argv[optionNo], "-f" ) == 0 )
    { FileName = argv[optionNo+1]; } 
    else if ( strcmp( argv[optionNo], "-s" ) == 0 )
    {
      if ( strcmp( argv[optionNo+1], "ig" ) == 0 )
      {
	ScoringType = INFOGAIN;
      }
      else if ( strcmp( argv[optionNo+1], "ps" ) == 0 ) 
      {
	ScoringType = PROBSUPT;
      }
      else if ( strcmp( argv[optionNo+1], "li" ) == 0 ) 
      { 
	ScoringType = LIFT; 
	ScoreChange = atof( argv[optionNo+2] ); 
	optionNo++; 
      }
      else if ( strcmp( argv[optionNo+1], "ee" ) == 0 ) 
      { 
	ScoringType = EFFEST; 
	LOCAtt = atoi( argv[optionNo+2] ); 
	optionNo++; 
      }
    }
    else if ( strcmp( argv[optionNo], "-z" ) == 0 )
    { StackSize = atoi( argv[optionNo+1] ); }
    else if ( strcmp( argv[optionNo], "-b" ) == 0 )
    { BestSupport = atoi( argv[optionNo+1] )/(float)100.0; }
    else if ( strcmp( argv[optionNo], "-o" ) == 0 )
    {
      CheckOld = atoi( argv[optionNo+1] ); 
      CheckEvery = atoi( argv[optionNo+2] ); 
      optionNo++;
    }
    else if ( strcmp( argv[optionNo], "-alpha" ) == 0 ) 
    { Alpha = atof( argv[optionNo+1] ); }
    else if ( strcmp( argv[optionNo], "-beta" ) == 0 )
    { Beta = atof( argv[optionNo+1] ); }
    else if ( strcmp( argv[optionNo], "-gamma" ) == 0 )
    { Gamma = atof( argv[optionNo+1] ); }
    else if ( strcmp( argv[optionNo], "-test" ) == 0 )
    { whichRuleStr = argv[optionNo+1]; JustTest = 1; }
    else if ( strcmp( argv[optionNo], "-disc" ) == 0 )
    { JustDisc = 1; optionNo--; }
    else { printHelp(); exit( 1 ); }
  }
  
  // Print the options.
  printf( "Options\n" );
  printf( "  StackSize = %-4d\n", StackSize );
  printf( "  Discretization Method = %s\n", DiscrMethod == EQUALRANGE ? "Equal Range" : "Equal Frequency" );
  printf( "  \tBins = %-4d\n", NBins );
  printf( "  ReportNo = %-4d\n", ReportNo );
  printf( "  FileName = %s\n", FileName );
  printf( "  ScoringType = " );
  if ( ScoringType == INFOGAIN ) printf( "INFOGAIN\n" );
  else if ( ScoringType == LIFT ) printf( "LIFT\n\tBase Score = %3.2f\n", ScoreChange );
  else if ( ScoringType == PROBSUPT ) printf( "Prob*Supt\n" );
  else if ( ScoringType == EFFEST ) 
  {
    printf( "Effort, Pd, Pf\n" );
    printf("\tLOC Attr No. = %d\n", LOCAtt );
    printf("\talpha = %f\n", Alpha );
    printf("\tbeta = %f\n", Beta );
    printf("\tgamma = %f\n", Gamma );
  }
  printf( "  BestSupport = %-4.2f%%\n", BestSupport * 100 );
  printf( "  Pick Type = %s\n", ( PickType == PICK ) ? "Random" : "Exaustive" );
  printf( "\tNumber of Picks = %d\n", PickNo );
  printf( "  Check Old = %s\n", ( CheckOld ) ? "True" : "False" );
  printf( "  CheckEvery = %d\n", CheckEvery );
  printf( "  Test Only = %s\n", JustTest ? "True" : "False" );
  if ( JustTest ) printf( "\tWhich Rule File = %s\n", whichRuleStr );
  printf( "  Disc Only = %s\n", JustDisc ? "True" : "False" );
  printf( "\n\n" );

  // Set up a new stack with a max size of the first command-line argument
  Stack *stack = newStack( StackSize, deletePickObject, setupPickObject );

  // Read in the class and attribute information and populate the data arrays.
  GetNames();
  GetData(".data");

  // We have to calculate the lines of code before they are discretized, otherwise the value will be off.
  // Therefore we must initialize the LOCs array BEFORE discretization.
  if ( ScoringType == EFFEST ) 
  {
    int item, att;

    calcEffEst( LOCAtt );

    // Write the lines of code to a file.
    if ( JustDisc )
    {
      FILE *f = fopen( "effest", "w" );
      for ( item = 0; item < MaxItem+1; item++ )
	fprintf( f, "%d\n", LOCs[item] );
      fclose( f );
    }
    // Read in the lines of code from a file.
    else if ( SpecialStatus[LOCAtt] == DISCRETE )
    {
      LOC = 0;
      FILE *f = fopen( "effest", "r" );
      for ( item = 0; item < MaxItem+1; item++ )
      {
	fscanf( f, "%d", &(LOCs[item]) );
	LOC += LOCs[item];
      }
      fclose( f );
    }
		 
    for ( item = 0; item < MaxItem+1; item++ )
      for ( att = 0; att < MaxAtt+1; att++ )
	if ( SpecialStatus[att] == Nil )
	{
	  Item[item][att]._cont_val = Item[item][att]._cont_val < pow( 10, -5 ) ? pow( 10, -5 ) : Item[item][att]._cont_val;
	  Item[item][att]._cont_val = log( Item[item][att]._cont_val );
	}
  }

  if ( DiscrMethod == EQUALRANGE ) nBins( NBins, ScoringType == EFFEST );
  else if ( DiscrMethod == EQUALFREQ ) eqFr( NBins, ScoringType == EFFEST );  
 
  // Get the scoring method.
  if ( ScoringType == INFOGAIN )
  {
    score = scoreInfoGain;
  }
  else if ( ScoringType == LIFT )
  {
    calcBaseLift();
    score = scoreLift;
  }
  else if ( ScoringType == PROBSUPT )
  {
    calcBaseProbSupt();
    score = scoreProbSupt;
  }
  else if ( ScoringType == EFFEST )
  {
    score = scoreEffEst;
  }

  // If we are just discretizing for later use, print the discretized data and exit.
  if ( JustDisc )
  {
    printDiscrData();
    return 0;
  }

  // If we are just testing, read the rule and report the test results.
  if ( JustTest )
  {
    PickObject *po = readRuleFromFile( whichRuleStr );
    po->mScore = score( po );
    printRule( po );
    printf( "%f,%f,%f,%f,%f\n", po->mScore, po->mPD, po->mPF, po->mEffort, po->mSupport );
    return 0;
  }
  // Otherwise, train on the data and get the rules.
  int attr, attrVal;
  for ( attr = 0; attr < MaxAtt + 1; attr++ )
  {
    for ( attrVal = 1; attrVal < MaxAttVal[attr] + 1; attrVal++ )
    {
      PickObject *po;
      float pair[3];
      pair[0] = attr;
      pair[1] = attrVal;
      pair[2] = rand()/(float)RAND_MAX * 100.0;
      
      po = (PickObject*)setupPickObject( pair );
      po->mScore = score( po );
      if ( po->mScore == 0 ) po->mScore += 0.001;
      pushWithScore( comparePickObject, po, stack );
    }
  }
  
  OldLift = getScorePickObject( stack->mTop->mData );
  // If PickType is PICK, we will do the stochastic picking
  int y, truePickNo = 0;
  FILE *file = fopen( "TopOfStack", "w" );
  //pickTwo PickNo times
  for ( y = 0; y < PickNo; y++ ) 
  {
    //printDistribution( stack, getScoreInt );
    pickTwo( stack, score, getScorePickObject, comparePickObject, combinePickObject );
    fprintf( file, "Pick %d\n", y + 1 );
    printRuleFile( file, ((PickObject*)stack->mTop->mData) );
    if ( ( y + 1 ) % CheckEvery == 0 )
    {
      float curLift = getScorePickObject( stack->mTop->mData );
      truePickNo = y;
      if ( ( curLift - OldLift )/OldLift < 0.2 ) y = PickNo;
      OldLift = curLift;
    }
  }
    
  // Print the stack header.
  fclose( file );
  if ( 1 )//ScoringType != EFFEST )
  {
    printf( "\tStack after picking two %d times\n", truePickNo );
    printStack( stack, ReportNo );

    FILE *wr = fopen( "wr", "w" );
    printRuleFile( wr, stack->mTop->mData );
    fclose( wr );
  }
  else
  {
    int gPD;
    float goalPD;
    PickObject *tpo = ((PickObject*)stack->mTop->mData);
    FILE *gpdf = fopen( "pdeffest", "w+" );
    for ( gPD = 0; gPD <= 100; gPD+=10 )
    {
      goalPD = gPD/(float)100.0;
      tpo->mScore = calcDist( goalPD - tpo->mPD, tpo->mPF, tpo->mEffort, tpo->mSupport );
      printf( "Top Rule with GoalPD = %f: %f - %f = %f\n", goalPD, goalPD, tpo->mPD, goalPD - tpo->mPD );
      printRule( tpo );
      printf( "\n" );
      fprintf( gpdf, "%f %f\n", tpo->mPD, tpo->mPF );
    }
    fclose( gpdf );
  }
  printf( "\nStart %d. Stop %d.\n", t, time(0) );
  printf( "Took %ds to run\n\n", time(0) - t );
   
  return 0;
}
