#!/usr/bin/env python

import csv
import argparse
import copy
import random
import math
from cliffBORE import *
from discretize import *
from EffortStats import *
from util import *

def main():

    VirginData = []
    MREs=[]
#    LogMREs=[]
    CliffMREs=[]
#    CliffLogMREs=[]

    for i in range(9):
        MREs.append(EffortStats("vanilla"+str(i)))
#        LogMREs.append(EffortStats("log"+str(i)))
        CliffMREs.append(EffortStats("cliff"+str(i)))
#        CliffLogMREs.append(EffortStats("cliff+log"+str(i)))

    # Get experiment arguments
    parser = argparse.ArgumentParser(description='run cliff-effort experiment.')
    parser.add_argument('N', metavar='N', type=int, nargs=1, help='number of cross-val')
    parser.add_argument('BINS', metavar='BINS', type=int, nargs=1, help='number of bins')
    parser.add_argument('FILE', metavar='FILE', type=str, nargs=1, help='filename')
    parser.add_argument('TOP', metavar='TOP', type=int,nargs=1,help='Top columns in BORE')
    args = parser.parse_args()

    # Load csv file
    
    try:
        reader = csv.reader(open(args.FILE[0],'rb'), delimiter=',')
    except:
        print "Problem reading file.  Aborting"
        return

    for row in reader:
        FloatRow = []
        for item in row:
            FloatRow.append(float(item))
        VirginData.append(FloatRow)
        del FloatRow

    discretize = CliffDiscretize(VirginData,args.BINS[0])

    # Split into cross-val sets.
    for i in range(args.N[0]):

        Sets = []
        WorkingData = copy.deepcopy(VirginData)
        random.shuffle(WorkingData)

        for j in range(args.N[0]):
            Sets.append([])

        count = 0
        for j in range(len(WorkingData)):
            Sets[count].append(WorkingData[j])
            count = count + 1
            if count == args.N[0]:
                count = 0
        
        del count
        
        for j in range(args.N[0]):

            Train = []
            Test = []

            for k in range(args.N[0]):
                if j == k:
                    for instance in Sets[k]:
                        Test.append(instance)
                else:
                    for instance in Sets[k]:
                        Train.append(instance)
            
            NoisyTrain = []

            for k in range(9):
                NoisyTrain.append([])
                NoisyTrain[-1] = copy.deepcopy(Train)

            NoiseIndices = [-1]

            TopN = CliffBORE(discretize.DiscretizeSet(deepcopy(VirginData),True)).crits

            # If you want to randomize the top indepenent variables
            for klass in TopN:
                randomizeStuff = min(args.TOP[0],len(klass))
                for topN in range(randomizeStuff):
                    if klass[topN][-1] not in NoiseIndices:
                        NoiseIndices.append(klass[topN][-1])

            # If you want to randomize the dependent variable.
            # NoiseIndices = [-1]

            for NoiseIndice in NoiseIndices:
                # Create training sets with noise increments of 10%
                for percent in range(1,9,1):
                    for num in range( int( float( len(Train) ) * ( float(percent) / 10.0) ) ):
                        NoisyTrain[percent-1][num][NoiseIndice] = copy.deepcopy(Train[random.randint(0,len(Train)-1)][NoiseIndice])

            # Construct the prototype train-set using Cliff
            CliffTrain = []
 #           CliffLogTrain = []
            CliffTrain.append(discretize.GetQualifyingInstances(CliffBORE(discretize.DiscretizeSet(deepcopy(Train))).prototypes,deepcopy(Train)))
 #           CliffLogTrain.append(CliffBORE(log_datum(deepcopy(Train))).ReducedSet)
            for percent in range(1,9,1):
                CliffTrain.append(discretize.GetQualifyingInstances(CliffBORE(discretize.DiscretizeSet(deepcopy(NoisyTrain[percent-1]))).prototypes,deepcopy(NoisyTrain[percent-1])))
 #               CliffLogTrain.append(CliffBORE(log_datum(deepcopy(NoisyTrain[percent-1]))).ReducedSet)
            # Find Nearest Neighbor
            for instance in Test:
#                print "Train: "+str(len(Train))
#                print "CliffTrain: "+str(len(CliffTrain[0]))
                MREs[0].Evaluate(instance,deepcopy(Train),5)
 #               LogMREs[0].Evaluate(log(deepcopy(instance)),log_datum(deepcopy(Train)))
                CliffMREs[0].Evaluate(instance,deepcopy(CliffTrain[0]),5)
 #               CliffLogMREs[0].Evaluate(log(deepcopy(instance)),CliffLogTrain[0])
                for percent in range(1,9,1):
#                    print "Train: "+str(len(NoisyTrain[percent-1]))
#                    print "CliffTrain: "+str(len(CliffTrain[percent]))
                    MREs[percent].Evaluate(instance,deepcopy(NoisyTrain[percent-1]),3)
 #                   LogMREs[percent].Evaluate(log(deepcopy(instance)), log_datum(deepcopy(NoisyTrain[percent-1])))
                    CliffMREs[percent].Evaluate(instance,deepcopy(CliffTrain[percent]),3)
 #                   CliffLogMREs[percent].Evaluate(log(deepcopy(instance)), CliffLogTrain[percent])

    for noise in range(len(MREs)):
        print args.FILE[0].split('/')[-1],MREs[noise].label+"\t"+str(noise*10)+"\t"+str(MREs[noise].GetMDMRE())
#        print args.FILE[0].split('/')[-1],LogMREs[noise].label+"\t"+str(noise*10)+"\t"+str(LogMREs[noise].GetMDMRE())
        print args.FILE[0].split('/')[-1],CliffMREs[noise].label+"\t"+str(noise*10)+"\t"+str(CliffMREs[noise].GetMDMRE())
#        print args.FILE[0].split('/')[-1],CliffLogMREs[noise].label+"\t"+str(noise*10)+"\t"+str(CliffLogMREs[noise].GetMDMRE())

if __name__ == '__main__':
    main()
