#!/usr/bin/env python

import csv
import argparse
import copy
import random
import math
from cliffBORE import *
from discretize import *
from NaiveBayes import *
from statistics import *
from util import *
from arff import *
from popularity import *
from knn import *

def subsample(data):

    VirginData = []
    
    def isTrue(instance):
        if instance[-1].upper() == "TRUE" or instance[-1].upper() == "YES":
            return True
        else:
            return False

    def isFalse(instance):
        if instance[-1].upper() == "FALSE" or instance[-1].upper() == "NO":
            return True
        else:
            return False

    trueInstances = filter(isTrue,data)
    falseInstances = filter(isFalse,data)
    
    for item in trueInstances:
        VirginData.append(item)

    for item in trueInstances:
        instance = random.choice(falseInstances)
        VirginData.append(instance)
        falseInstances.remove(instance)

    random.shuffle(VirginData)
    return VirginData
    

def main():

    VirginData = []
    Bayes=[]
    CliffBayes=[]
    NN=[]
    CliffNN=[]

    for i in range(9):
        Bayes.append(DefectStats("vlla"+str(i)))
        CliffBayes.append(DefectStats("cliff"+str(i)))
        NN.append(DefectStats("NNlla"+str(i)))
        CliffNN.append(DefectStats("kffNN"+str(i)))

    # Get experiment arguments
    parser = argparse.ArgumentParser(description='run cliff-effort experiment.')
    parser.add_argument('N', metavar='N', type=int, nargs=1, help='number of cross-val')
    parser.add_argument('BINS', metavar='BINS', type=int, nargs=1, help='number of bins')
    parser.add_argument('FILE', metavar='FILE', type=str, nargs=1, help='filename')
#    parser.add_argument('TOP', metavar='TOP', type=int,nargs=1,help='Top columns in BORE')
    args = parser.parse_args()

    # Load csv file
    
    try:
        data = Arff(args.FILE[0])

        VirginData = data.data
#        reader = csv.reader(open(args.FILE[0],'rb'), delimiter=',')
    except:
        print "Problem reading file.  Aborting"
        return

    discretize = CliffDiscretize(VirginData,args.BINS[0])

    # Split into cross-val sets.
    for i in range(args.N[0]):

        Sets = []
        WorkingData = copy.deepcopy(VirginData)
        random.shuffle(WorkingData)

        for j in range(args.N[0]):
            Sets.append([])

        count = 0
        for j in range(len(WorkingData)):
            Sets[count].append(WorkingData[j])
            count = count + 1
            if count == args.N[0]:
                count = 0
        
        del count
        
        for j in range(args.N[0]):

            Train = []
            Test = []

            for k in range(args.N[0]):
                if j == k:
                    for instance in Sets[k]:
                        Test.append(instance)
                else:
                    for instance in Sets[k]:
                        Train.append(instance)
            
            NoisyTrain = []

#            for k in range(9):
#                NoisyTrain.append([])
#                NoisyTrain[-1] = copy.deepcopy(Train)

#            PopularTrain = Popularity(copy.deepcopy(Train)).GetWholeSet()

            for k in range(9):
                NoisyTrain.append([])
                NoisyTrain[-1] = copy.deepcopy(Train)

            NoiseIndices = [-1]

 #           TopN = CliffBORE(discretize.DiscretizeSet(deepcopy(VirginData))).crits

            # If you want to randomize the top indepenent variables
#            for klass in TopN:
#                randomizeStuff = min(args.TOP[0],len(klass))
#                for topN in range(randomizeStuff):
#                    if klass[topN][-1] not in NoiseIndices:
#                        NoiseIndices.append(klass[topN][-1])

            # If you want to randomize the dependent variable.
            # NoiseIndices = [-1]

            for NoiseIndice in NoiseIndices:
                # Create training sets with noise increments of 10%
                for percent in range(1,9,1):
                    for num in range( int( float( len(Train) ) * ( float(percent) / 10.0) ) ):
                        NoisyTrain[percent-1][num][NoiseIndice] = copy.deepcopy(Train[random.randint(0,len(Train)-1)][NoiseIndice])

            # Construct the prototype train-set using Cliff
            CliffTrain = []
            CliffTrain.append(discretize.GetQualifyingInstances(CliffBORE(discretize.DiscretizeSet(deepcopy(Train))).prototypes,deepcopy(Train)))

            for percent in range(1,9,1):
                CliffTrain.append(discretize.GetQualifyingInstances(CliffBORE(discretize.DiscretizeSet(deepcopy(NoisyTrain[percent-1]))).prototypes,deepcopy(NoisyTrain[percent-1])))

            BayesClassifiers = []
            CliffBayesClassifiers = []
            BayesClassifiers.append(NaiveBayes(copy.deepcopy(Train)))
            CliffBayesClassifiers.append(NaiveBayes(copy.deepcopy(CliffTrain[0])))
            for percent in range(1,9,1):
                BayesClassifiers.append(NaiveBayes(copy.deepcopy(NoisyTrain[percent-1])))
                CliffBayesClassifiers.append(NaiveBayes(copy.deepcopy(CliffTrain[percent])))

            # Find Nearest Neighbor
            for instance in Test:
                Bayes[0].Evaluate(BayesClassifiers[0].NaiveBayesClassify(instance),instance[-1])
                CliffBayes[0].Evaluate(CliffBayesClassifiers[0].NaiveBayesClassify(instance),instance[-1])
                NN[0].Evaluate(kNearestNeighbors(instance,Train)[0][-1],instance[-1])
                CliffNN[0].Evaluate(kNearestNeighbors(instance,CliffTrain[0])[0][-1],instance[-1])
                for percent in range(1,9,1):
                    Bayes[percent].Evaluate(BayesClassifiers[percent].NaiveBayesClassify(instance),instance[-1])
                    CliffBayes[percent].Evaluate(CliffBayesClassifiers[percent].NaiveBayesClassify(instance),instance[-1])
                    NN[percent].Evaluate(kNearestNeighbors(instance,NoisyTrain[percent-1])[0][-1],instance[-1])
                    CliffNN[percent].Evaluate(kNearestNeighbors(instance,CliffTrain[percent])[0][-1],instance[-1])
                    
    for noise in range(len(Bayes)):
        print args.FILE[0].split('/')[-1],Bayes[noise].label+"\t"+str(noise*10)+"\tTRUE\t"+str(Bayes[noise].pd("TRUE"))+"\t"+str(Bayes[noise].pf("TRUE"))+"\t"+str(Bayes[noise].precision("TRUE"))
        print args.FILE[0].split('/')[-1],Bayes[noise].label+"\t"+str(noise*10)+"\tFALSE\t"+str(Bayes[noise].pd("FALSE"))+"\t"+str(Bayes[noise].pf("FALSE"))+"\t"+str(Bayes[noise].precision("FALSE"))
        print args.FILE[0].split('/')[-1],CliffBayes[noise].label+"\t"+str(noise*10)+"\tTRUE\t"+str(CliffBayes[noise].pd("TRUE"))+"\t"+str(CliffBayes[noise].pf("TRUE"))+"\t"+str(CliffBayes[noise].precision("TRUE"))
        print args.FILE[0].split('/')[-1],CliffBayes[noise].label+"\t"+str(noise*10)+"\tFALSE\t"+str(CliffBayes[noise].pd("FALSE"))+"\t"+str(CliffBayes[noise].pf("FALSE"))+"\t"+str(CliffBayes[noise].precision("FALSE"))
        print args.FILE[0].split('/')[-1],NN[noise].label+"\t"+str(noise*10)+"\tTRUE\t"+str(NN[noise].pd("TRUE"))+"\t"+str(NN[noise].pf("TRUE"))+"\t"+str(NN[noise].precision("TRUE"))
        print args.FILE[0].split('/')[-1],NN[noise].label+"\t"+str(noise*10)+"\tFALSE\t"+str(NN[noise].pd("FALSE"))+"\t"+str(NN[noise].pf("FALSE"))+"\t"+str(NN[noise].precision("FALSE"))
        print args.FILE[0].split('/')[-1],CliffNN[noise].label+"\t"+str(noise*10)+"\tTRUE\t"+str(CliffNN[noise].pd("TRUE"))+"\t"+str(CliffNN[noise].pf("TRUE"))+"\t"+str(CliffNN[noise].precision("TRUE"))
        print args.FILE[0].split('/')[-1],CliffNN[noise].label+"\t"+str(noise*10)+"\tFALSE\t"+str(CliffNN[noise].pd("FALSE"))+"\t"+str(CliffNN[noise].pf("FALSE"))+"\t"+str(CliffNN[noise].precision("FALSE"))

if __name__ == '__main__':
    main()
