package feedback;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.StringTokenizer;
import java.util.HashMap;
import java.util.Random;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.HitCollector;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryTermVector;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermFreqVector;

/**
 *
 * @author greg
 */
public class AutoRank {
    
    public static ArrayList<Float> scores = new ArrayList<Float>();
    public static ArrayList<Float> fakeScores = new ArrayList<Float>();
    public static HashMap noiseCache = new HashMap();

    public static void main(String[] args) throws Exception {
        String usage =
                "Usage:\tYou didn't enter any arguments. Try again.";
        if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
            System.out.println(usage);
            System.exit(0);
        }

        String index = "index";
        String field = "contents";
        String queryFile="";
        String outputFile="output.txt";
        String newQuery="";
        boolean raw = true;
        String normsField = null;
        int hitsPerPage = 10;
        String relevant = "";
        String irrelevant = "";
        String skipped = "";
        int numRel = 0;
        int numIr = 0;
        int numRanked=0,numRounds=0;
        int rankMode=1;
        int topTerms=10;
        float a=1;
        float b=1;
        float c=1;
        int numPerRound=1;
        IndexReader reader = IndexReader.open(index);
        int numDocs= reader.numDocs();
        int maxResults=numDocs;
        ArrayList<Integer> targetMethods = new ArrayList<Integer>();

        for (int i = 0; i < args.length; i++) {
            if ("-index".equals(args[i])) {
                index = args[i + 1];
                i++;
            } else if ("-a".equals(args[i])){
                a=Float.parseFloat(args[i+1]);
                i++;
            } else if ("-b".equals(args[i])){
                b=Float.parseFloat(args[i+1]);
                i++;
            } else if ("-c".equals(args[i])){
                c=Float.parseFloat(args[i+1]);
                i++;
            } else if ("-rank".equals(args[i])) {
                numPerRound=Integer.parseInt(args[i+1]);
                i++;
            } else if ("-maxDocs".equals(args[i])) {
                maxResults=Integer.parseInt(args[i+1]);
                i++;
            }else if ("-field".equals(args[i])) {
                field = args[i + 1];
                i++;
            } else if ("-norms".equals(args[i])) {
                normsField = args[i + 1];
                i++;
            }else if ("-includeTerms".equals(args[i])) {
               rankMode=Integer.parseInt(args[i + 1]);
               topTerms=Integer.parseInt(args[i+2]);
                i+=2;
            }else if("-query".equals(args[i])){
                queryFile=args[i+1];
                i++;
            }else if("-output".equals(args[i])){
                outputFile=args[i+1];
                i++;
            }
        }

        System.out.println("Your settings: a="+a+" b="+b+" c"+c+" methods per round="+numPerRound+
                "\nmax similar results="+maxResults+" term ranking= method "+rankMode+","+topTerms+" top terms.");
        
        int numTerms=0;
        HashMap termDict=new HashMap(0);
        for(int g=0;g<numDocs;g++){
            TermFreqVector test = reader.getTermFreqVector(g,"contents");
            String[] docTerms = test.getTerms();
            int[] freqs = test.getTermFrequencies();
            numTerms+=docTerms.length;
            
            for(int h=0;h<docTerms.length;h++){
                if(!termDict.containsKey(docTerms[h])){
                    termDict.put(docTerms[h], freqs[h]);
                }else{
                    int freq=(Integer) termDict.get(docTerms[h]);
                    freq+=freqs[h];
                    termDict.remove(docTerms[h]);
                    termDict.put(docTerms[h],freq);
                }
            }
        }
        
        int[] rel = new int[numDocs + 1];
        int[] ir = new int[numDocs + 1];
        int[] skip = new int[numDocs + 1];

        //intialize rating arrays
        for (int i = 0; i <= numDocs; i++) {
            rel[i] = 0;
            ir[i] = 0;
            skip[i]=0;
            scores.add((float) 0.0);
            fakeScores.add((float) 0.0);
        }

        if (normsField != null) {
            reader = new OneNormsReader(reader, normsField);
        }

        Searcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer();

        BufferedReader in =  new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
        QueryParser parser = new QueryParser(field, analyzer);

        String line="";
        Boolean done = false;
                    
        if(queryFile.equals("")){
            System.out.println("Please enter your initial query: ");
            line = in.readLine();
            line = line.trim();
            System.out.println("Please list the target methods. Enter \"done\" when done.");

            while(!done){
                System.out.println();
                String method = in.readLine();
                if(method.equals("done"))
                    done=true;
                else{
                    for(int i=0;i<numDocs;i++){
                        Document doc = reader.document(i);
                        if(method.equals(doc.get("title"))){
                            targetMethods.add(i);
                            System.out.println("Found - Document "+i);
                            break;
                        }
                        else if(i==numDocs-1)
                            System.out.println("Not found. Please check your spelling and try again");
                    }

                }
            }
        }else{
            BufferedReader qf = new BufferedReader(new FileReader(queryFile));
            String cline="";
            int now=0;
            try{
                while((cline=qf.readLine())!=null){
                    now++;
                    if(now==1){
                        line=cline;
                        line=line.trim();
                        System.out.println("Your initial query is:"+line);
                    }
                    else if(now>=3){
                        for(int i=0;i<numDocs;i++){
                            Document doc = reader.document(i);
                            if(cline.equals(doc.get("title"))){
                                targetMethods.add(i);
                                System.out.println("Found - Document "+i);
                                break;
                            }
                            else if(i==numDocs-1)
                                System.out.println("Not found. Please check your spelling and try again");
                        }
                    }
                }
            }finally{
                qf.close();
            }
        }
        
        //Parse query.
        Query query = parser.parse(line);
        QueryTermVector queryTermVector = new QueryTermVector( line, analyzer );
        String[] queryTerms = queryTermVector.getTerms();
        int[] queryFreqs = queryTermVector.getTermFrequencies();
    
        ArrayList<String> newTerms = new ArrayList<String>();
        ArrayList<Integer> newFreqs = new ArrayList<Integer>();

        
        //Remove terms with df of 0 (aka: aren't in the corpus)
        for(int i=0;i<queryTerms.length;i++){
            int df = reader.docFreq(new Term("contents",queryTerms[i]));
            if(df>0){
                newTerms.add(queryTerms[i]);
                newFreqs.add(queryFreqs[i]);
            }
        }
        String revisedQuery="";
        for(int j=0;j<newTerms.size();j++){
            for(int k=0;k<newFreqs.get(j);k++){
                revisedQuery=revisedQuery+newTerms.get(j)+" ";
            }
        }

        query = parser.parse(revisedQuery);
        queryTermVector = new QueryTermVector( revisedQuery, analyzer );
        System.out.println("Searching for: " + query.toString(field));

        doStreamingSearch(searcher, query);
        ArrayList<Integer> ranks=sortAndDisplay(numDocs, reader,maxResults, rel, ir, skip,0);

        String command = "";
        String rate = "";
        int docNum = 0;
        String rating = "";
        
        //Get initial positions
        ArrayList<Integer> positions = findTargets(ranks,targetMethods,maxResults);
        System.out.println("The initial position(s) of your target method(s) are:");
        for(int i=0;i<targetMethods.size();i++){
                System.out.println(positions.get(i));
        }

        //Feedback loop
        done=false;
        while(!done) {

            for(int i=0;i<numPerRound;i++){
                for(int j=0;j<targetMethods.size();j++){
                    if(targetMethods.get(j).equals(ranks.get(i))){
                        System.out.println("Target method in the top "+numPerRound+". Quitting.");
                        done=true;
                        break;
                    }
                }
                if(done==true)
                    break;
            }

            if(done==true)
                break;
            
            //Compute possible options
            int options=(int) Math.pow(2,numPerRound);
            int[] fakerel,fakeir,fakeskip;
            ArrayList<Integer> testRanks,testPositions;
            Query testQuery2;
            int improvement,bestImprovement,bestOption,fakeNumRel,fakeNumIr, gx,hx,test;
            Boolean stillRating=true;
            String rep="";
            
            while(stillRating&&numRanked<50){
 
                bestOption=-1;
                bestImprovement=0;
                
                for(int l=0;l<targetMethods.size();l++){
                    bestImprovement+=(positions.get(l));
                }
                for(int i=0;i<numPerRound;i++){
                    for(int j=0;j<targetMethods.size();j++){
                        if(targetMethods.get(j).equals(ranks.get(i))){
                            System.out.println("Target method in the top "+numPerRound+". Quitting.");
                            done=true;
                            break;
                        }
                    }
                    if(done==true)
                        break;
                }
                
                if(done==true)
                    break;
                
                for(int i=0;i<options;i++){
                    if((a==0)&&(numRanked==0))
                        i++;
                    
                    improvement=0;
                    gx=0;
                    hx=0;
                    test=0;
                    
                    //System.out.println("1. Debugging - im="+improvement+",bi="+bestImprovement+",bo="+bestOption);
                   
                    rep = Integer.toBinaryString(i);
                    while(rep.length()<numPerRound){
                        rep="0"+rep;
                    }

                    fakerel = rel;
                    fakeir = ir;
                    fakeskip = skip;
                    fakeNumRel=numRel;
                    fakeNumIr=numIr;

                    //Rate
                    for(int j=0;j<numPerRound;j++){
                        if(rep.charAt(j)=='0'){
                            fakeir[ranks.get(j)]=1;
                            fakerel[ranks.get(j)]=0;
                            fakeskip[ranks.get(j)]=0;
                            fakeNumIr++;
                        }else{
                            fakeir[ranks.get(j)]=0;
                            fakerel[ranks.get(j)]=1;
                            fakeskip[ranks.get(j)]=0;
                            fakeNumRel++;
                        }
                    }

                    //Test
                    String testQuery= buildNewQuery(queryTermVector,reader,
                            fakerel,fakeir,a,b,c,
                            numDocs,fakeNumRel,fakeNumIr,
                            maxResults,numTerms,termDict,
                            rankMode,topTerms);
                    testQuery = testQuery.trim();
                    testQuery2 = parser.parse(testQuery);
                    doStreamingSearch(searcher, testQuery2, 1);
                    testRanks=sortAndDisplay(numDocs, reader,maxResults, fakerel, fakeir, fakeskip,1);
                    testPositions = findTargets(testRanks,targetMethods,maxResults);
     
                    for(int k=0;k<targetMethods.size();k++){
                        gx=(positions.get(k)-testPositions.get(k));
                        hx=(testPositions.get(k));
                        test=(positions.get(k));
                        
                        improvement=-(gx)+hx;
                        //Penalty
                        if(gx>test)
                            improvement=100000000;
                        
                        if(improvement<bestImprovement){
                            bestImprovement=improvement;
                            bestOption=i;
                            //System.out.println("\t2.(true) Debugging - im="+improvement+",bi="+bestImprovement+",bo="+bestOption);
                        }else{
                            //System.out.println("\t2.(false) Debugging - im="+improvement+",bi="+bestImprovement+",bo="+bestOption);
                        }
                    }
                    
                }

                //System.out.println("\t\t3. Debugging - bi="+bestImprovement+",bo="+bestOption);
                System.out.println("Best option: "+bestOption);

                if(bestOption!=-1){
                    rep = Integer.toBinaryString(bestOption);
                    while(rep.length()<numPerRound){
                            rep="0"+rep;
                    }
                    stillRating=false;
                }else{
                    for(int x=0;x<numPerRound;x++){
                        skip[ranks.get(x)]=0;
                        rel[ranks.get(x)]=0;
                        ir[ranks.get(x)]=0;
                        if(x==0){
                            skip[ranks.get(x)]=1;
                        }
                    }
                    numRanked++;
                    ranks=sortAndDisplay(numDocs, reader,maxResults, rel, ir, skip,0);
                }
            }
            
            if(!(stillRating&&numRanked>=50)){
                for(int j=0;j<numPerRound;j++){
                        if(rep.charAt(j)=='0'){
                            ir[ranks.get(j)]=1;
                            rel[ranks.get(j)]=0;
                            skip[ranks.get(j)]=0;
                            numIr++;
                        }else{
                            ir[ranks.get(j)]=0;
                            rel[ranks.get(j)]=1;
                            skip[ranks.get(j)]=0;
                            numRel++;
                        }
                }

                //Build new query
                newQuery= buildNewQuery(queryTermVector,reader,
                        rel,ir,a,b,c,
                        numDocs,numRel,numIr,
                        maxResults,numTerms,termDict,
                        rankMode,topTerms);
                newQuery = newQuery.trim();

                query = parser.parse(newQuery);
                queryTermVector = new QueryTermVector( newQuery, analyzer );

                doStreamingSearch(searcher, query);
                ranks=sortAndDisplay(numDocs, reader,maxResults, rel, ir, skip,0);
                positions = findTargets(ranks,targetMethods,maxResults);

                System.out.println("The new position(s) of your target method(s) are:");
                for(int i=0;i<targetMethods.size();i++){
                    System.out.println(positions.get(i)+1);
                }

                numRanked+=numPerRound;
                numRounds++;
            }
            
            if (numRanked>=50) {
                System.out.println("Fifty methods ranked. Quitting.");
                done=true;
            }
        } 

        relevant = "";
        irrelevant = "";
        skipped="";

        for (int j = 0; j <= numDocs; j++) {
            if (rel[j] == 1) {
                relevant = relevant + " " + j;
            }
            if (ir[j] == 1) {
                irrelevant = irrelevant + " " + j;
            }
            if (skip[j] == 1) {
                skipped = skipped + " " + j;
            }
        }
        System.out.println("\nA total of "+numRanked+" methods were ranked over "+numRounds+" rounds.");
        
        System.out.println("Relevant Documents:" + relevant);
        System.out.println("Irrelevant Documents:" + irrelevant);
        System.out.println("Skipped Documents:"+ skipped);

        System.out.println("\nThe final position(s) of your target method(s) are:");
        String finalTars="";
        for(int i=0;i<targetMethods.size();i++){
            if((positions.get(i)==1001)&&(skip[targetMethods.get(i)]==1)){
                System.out.println(1);
                finalTars=finalTars+1+",";
            }else{
                System.out.println(positions.get(i)+1);
                finalTars=finalTars+(positions.get(i)+1)+",";
            }
        }
        finalTars=finalTars.substring(0,finalTars.length()-1);
        
        System.out.println("\n Your final query is:\n"+newQuery);

        BufferedWriter out = new BufferedWriter(new FileWriter(outputFile,true));
        out.write(a+","+b+","+c+","+numPerRound+","+rankMode+","+topTerms+","+numRel+","+numIr+","+(numRanked-numRel-numIr)
                +","+numRanked+","+numRounds+","+finalTars);
        out.close();
        reader.close();
        in.close();
    }

    /** Use the norms from one field for all fields.  Norms are read into memory,
     * using a byte of memory per document per searched field.  This can cause
     * search of large collections with a large number of fields to run out of
     * memory.  If all of the fields contain only a single token, then the norms
     * are all identical, then single norm vector may be shared. */
    private static class OneNormsReader extends FilterIndexReader {

        private String field;

        public OneNormsReader(IndexReader in, String field) {
            super(in);
            this.field = field;
        }

        public byte[] norms(String field) throws IOException {
            return in.norms(this.field);
        }
    }

    public static ArrayList<Integer> sortAndDisplay(int numDocs, IndexReader reader, int maxResults, int[] rel, int[] ir, int[] skip, int testMode) throws Exception{
        ArrayList<Float> copy;
        if(testMode==0)
            copy = scores;
        else
            copy = fakeScores;
        
            ArrayList<Integer> cantBe = new ArrayList<Integer>();
            ArrayList<Integer> ranks = new ArrayList<Integer>();
            float max;
            int maxDoc;
            int numDisplayed=0;
            
            //Filter out previously rated documents
            for(int i=0;i<=numDocs;i++){
                if(rel[i]==1||ir[i]==1||skip[i]==1){
                    cantBe.add(i);
                }
            }

            while((cantBe.size()<=numDocs)&&(numDisplayed<=maxResults)){
                maxDoc=-1;
                max=0;
                for(int i=0;i<numDocs;i++){
                    if((copy.get(i)>max)&&(cantBe.indexOf(i)==-1)){
                        max=copy.get(i);
                        maxDoc=i;
                    }     
                }
                if(maxDoc!=-1){
                    Document doc = reader.document(maxDoc);
                    //System.out.println("Document #"+maxDoc+" "+doc.get("title")+", Score: "+max); 
                    numDisplayed++;
                    ranks.add(maxDoc);
                }
                cantBe.add(maxDoc);
            }

       return ranks;
    }
    
    /*
     * Simple method to find the target methods
     */
    
    public static ArrayList<Integer> findTargets(ArrayList<Integer> ranks,ArrayList<Integer> targets, int maxResults){
        ArrayList<Integer> where = new ArrayList<Integer>();
        for(int k=0;k<targets.size();k++){
            where.add(maxResults+1);
        }
        
        for(int i=0;i<ranks.size();i++){
            for(int j=0;j<targets.size();j++){
                if(ranks.get(i).equals(targets.get(j))){
                    where.set(j,i);
                }
            }
        }
        
        return where;
    }
    
    /*
     * This method takes your old query and builds a new one based on your liked and disliked documents.
     * A,B, and C are user-supplied constants to represent weights. Defaults are 1. 
     */
     
    public static String buildNewQuery(QueryTermVector oldQuery, IndexReader reader, int[] rel, int[] ir, float a, float b, float c, int numDocs, int numLiked, int numDisliked, int maxResults, int numTerms,HashMap termDict,int rankMode, int topTerms) throws Exception{
        
        String[] queryTerms = oldQuery.getTerms();
        int[] queryFreqs = oldQuery.getTermFrequencies();
    
        ArrayList<String> newTerms = new ArrayList<String>();
        ArrayList<Float> newFreqs = new ArrayList<Float>();
        ArrayList<String> newQueryTerms = new ArrayList<String>();
        ArrayList<Float> newQueryFreqs = new ArrayList<Float>();
        ArrayList<String> removeTerms = new ArrayList<String>();
        ArrayList<Float> removeFreqs = new ArrayList<Float>();

        
        //Load in terms/freqs from original query
        for(int i=0;i<queryTerms.length;i++){
            int df = reader.docFreq(new Term("contents",queryTerms[i]));
            float idf= (float)Math.log(numDocs/(1+df));

            newQueryTerms.add(queryTerms[i]);
            //newFreqs.add(a*queryFreqs[i]*idf);
            newQueryFreqs.add(a*queryFreqs[i]);
        }

        
        //Compile list of liked and disliked terms
        if(numLiked>0||numDisliked>0){
            for(int j=0;j<numDocs;j++){
                if(rel[j]==1){
                    //Grab term list/freqs
                    TermFreqVector test = reader.getTermFreqVector(j,"contents");
                    String[] docTerms = test.getTerms();
                    int[] freqs = test.getTermFrequencies();
                    
                    //Go through list of terms
                    for(int k=0;k<docTerms.length;k++){
                        //See if term is already in list. Add it if not. Adjust freq if so.
                        if(newTerms.indexOf(docTerms[k])==-1){
                            newTerms.add(docTerms[k]);
                            int df = reader.docFreq(new Term("contents",docTerms[k]));
                            float idf= (float)Math.log(numDocs/(1+df));
                            //newFreqs.add(((b/numLiked)*(freqs[k]*idf)));
                            newFreqs.add((b/numLiked)*freqs[k]);
                            //System.out.println("REL: new term: "+newFreqs.get(k));
                        } else{
                            int index = newTerms.indexOf(docTerms[k]);
                            float numToAdd = newFreqs.get(index);
                            int df = reader.docFreq(new Term("contents",docTerms[k]));
                            float idf= (float)Math.log(numDocs/(1+df));
                            //numToAdd = numToAdd + ((b/numLiked)*(freqs[k]*idf));
                            numToAdd = numToAdd + ((b/numLiked)*freqs[k]);
                            newFreqs.set(index,numToAdd);
                            //System.out.println("REL: old term: "+newFreqs.get(k));
                        }
                    }
                    
                }else if(ir[j]==1){
                    //Grab term list/freqs
                    TermFreqVector test = reader.getTermFreqVector(j,"contents");
                    String[] docTerms = test.getTerms();
                    int[] freqs = test.getTermFrequencies();
                    
                    //Go through list of terms
                    for(int k=0;k<docTerms.length;k++){
                        //See if term is already in list. Add it if not. Adjust freq if so.
                        if(removeTerms.indexOf(docTerms[k])==-1){
                            removeTerms.add(docTerms[k]);
                            int df = reader.docFreq(new Term("contents",docTerms[k]));
                            float idf= (float)Math.log(numDocs/(1+df));
                            //removeFreqs.add(((c/numDisliked)*(freqs[k]*idf)));
                            removeFreqs.add((c/numDisliked)*freqs[k]);
                            //System.out.println("IR: new term: "+removeFreqs.get(k));
                        } else{
                            int index = removeTerms.indexOf(docTerms[k]);
                            float numToRemove = removeFreqs.get(index);
                            int df = reader.docFreq(new Term("contents",docTerms[k]));
                            float idf= (float)Math.log(numDocs/(1+df));
                            //numToAdd = numToAdd + ((b/numLiked)*(freqs[k]*idf));
                            numToRemove = numToRemove + ((c/numDisliked)*freqs[k]);
                            removeFreqs.set(index,numToRemove);
                            //System.out.println("IR: old term: "+removeFreqs.get(k));
                        }
                    }
                    
                }
            }
        }
        
      
        //Rank relevant terms
        String newQuery="";
        ArrayList<Double> rankScores=new ArrayList<Double>();
        ArrayList<String> bestTerms = new ArrayList<String>();
        
        //switch based on sort order    
        if(rankMode==1){
            //compute noise
            for(int n=0;n<newTerms.size();n++){
                double noise=0;
                if(noiseCache.containsKey(newTerms.get(n))){
                    noise=(Double) noiseCache.get(newTerms.get(n));
                }else{
                    for(int o=0;o<numDocs;o++){
                        TermFreqVector test = reader.getTermFreqVector(o,"contents");
                        String[] docTerms = test.getTerms();
                        int[] freqs = test.getTermFrequencies();
                        for(int p=0;p<docTerms.length;p++){
                            if(docTerms[p].equals(newTerms.get(n))){
                                double newNoise=(double) (freqs[p]*numTerms)/(Integer) termDict.get(newTerms.get(n));
                                newNoise*=Math.log((Integer)termDict.get(newTerms.get(n))*freqs[p]);
                                noise+=newNoise;
                                break;
                            }
                        }
                    }
                    noiseCache.put(newTerms.get(n),noise);
                }
                rankScores.add(noise);
            }
        }else if(rankMode==2){
            //Mode 2 is Pk (num documents in relevant set containing term k
            for(int n=0;n<newTerms.size();n++){
                double times=0;
                
                for(int o=0;o<rel.length;o++){
                    //Is the document relevant?
                    if(rel[o]==1){
                        TermFreqVector test = reader.getTermFreqVector(o,"contents");
                        String[] docTerms = test.getTerms();
                        int[] freqs = test.getTermFrequencies();

                        //Does it contain the term?
                        for(int p=0;p<docTerms.length;p++){
                            if(docTerms[p].equals(newTerms.get(n))){
                                times++;
                                break;
                            }
                        }
                    }
                }
                rankScores.add(times);
            }
        }else if(rankMode==3){
            //Mode 3 is modified noise (noise within relevant set)
            
            for(int n=0;n<newTerms.size();n++){
                double noise=0;
                int times=0;
                
                //First pass to get Pk
                for(int o=0;o<rel.length;o++){
                    //Is the document relevant?
                    if(rel[o]==1){
                        TermFreqVector test = reader.getTermFreqVector(o,"contents");
                        String[] docTerms = test.getTerms();
                        int[] freqs = test.getTermFrequencies();

                        //Does it contain the term?
                        for(int p=0;p<docTerms.length;p++){
                            if(docTerms[p].equals(newTerms.get(n))){
                                times++;
                                break;
                            }
                        }
                    }
                }
                
                //Second pass to collect modified noise
                 for(int o=0;o<rel.length;o++){
                    //Is the document relevant?
                    if(rel[o]==1){
                        TermFreqVector test = reader.getTermFreqVector(o,"contents");
                        String[] docTerms = test.getTerms();
                        int[] freqs = test.getTermFrequencies();
                        for(int p=0;p<docTerms.length;p++){
                            if(docTerms[p].equals(newTerms.get(n))){
                                double newNoise=(double) (freqs[p]*times)/(Integer) termDict.get(newTerms.get(n));
                                newNoise*=Math.log((Integer)termDict.get(newTerms.get(n))*freqs[p]);
                                noise+=newNoise;
                                break;
                            }
                        }
                    }
                 }
                rankScores.add(noise);
            }
        }else if(rankMode==4){
            //Mode 4 is noise*num occurences of k in the relevant set
           
            for(int n=0;n<newTerms.size();n++){
                double noise=0;
                int times=0;
                 //First pass to collect noise.
                if(noiseCache.containsKey(newTerms.get(n))){
                    noise=(Double) noiseCache.get(newTerms.get(n));
                }else{
                    for(int o=0;o<numDocs;o++){
                        TermFreqVector test = reader.getTermFreqVector(o,"contents");
                        String[] docTerms = test.getTerms();
                        int[] freqs = test.getTermFrequencies();
                        for(int p=0;p<docTerms.length;p++){
                            if(docTerms[p].equals(newTerms.get(n))){
                                double newNoise=(double) (freqs[p]*numTerms)/(Integer) termDict.get(newTerms.get(n));
                                newNoise*=Math.log((Integer)termDict.get(newTerms.get(n))*freqs[p]);
                                noise+=newNoise;
                                break;
                            }
                        }
                    }
                    noiseCache.put(newTerms.get(n),noise);
                }
                
                //Second pass to collect RTFk
                for(int o=0;o<rel.length;o++){
                    //Is the document relevant?
                    if(rel[o]==1){
                        TermFreqVector test = reader.getTermFreqVector(o,"contents");
                        String[] docTerms = test.getTerms();
                        int[] freqs = test.getTermFrequencies();

                        //Does it contain the term?
                        for(int p=0;p<docTerms.length;p++){
                            if(docTerms[p].equals(newTerms.get(n))){
                                times+=freqs[p];
                                break;
                            }
                        }
                    }
                }
                rankScores.add(noise*times);
            }
        }else if(rankMode==5){
             //Mode 5 is noise*num occurences of k in the collection*num docs in rel set containing k
           
            for(int n=0;n<newTerms.size();n++){
                double noise=0;
                int times=0;
                 //First pass to collect noise.
                if(noiseCache.containsKey(newTerms.get(n))){
                    noise=(Double) noiseCache.get(newTerms.get(n));
                }else{
                    for(int o=0;o<numDocs;o++){
                        TermFreqVector test = reader.getTermFreqVector(o,"contents");
                        String[] docTerms = test.getTerms();
                        int[] freqs = test.getTermFrequencies();
                        for(int p=0;p<docTerms.length;p++){
                            if(docTerms[p].equals(newTerms.get(n))){
                                double newNoise=(double) (freqs[p]*numTerms)/(Integer) termDict.get(newTerms.get(n));
                                newNoise*=Math.log((Integer)termDict.get(newTerms.get(n))*freqs[p]);
                                noise+=newNoise;
                                break;
                            }
                        }
                    }
                    noiseCache.put(newTerms.get(n),noise);
                }
                
                //Second pass to collect Pk
                for(int o=0;o<rel.length;o++){
                    //Is the document relevant?
                    if(rel[o]==1){
                        TermFreqVector test = reader.getTermFreqVector(o,"contents");
                        String[] docTerms = test.getTerms();
                        int[] freqs = test.getTermFrequencies();

                        //Does it contain the term?
                        for(int p=0;p<docTerms.length;p++){
                            if(docTerms[p].equals(newTerms.get(n))){
                                times++;
                                break;
                            }
                        }
                    }
                }
                rankScores.add(noise*(Integer) termDict.get(newTerms.get(n))*times);
            }
        }else if(rankMode==6){
             //Mode 5 is noise*num occurences of k in the collection
           
            for(int n=0;n<newTerms.size();n++){
                double noise=0;
                 //First pass to collect noise.
                if(noiseCache.containsKey(newTerms.get(n))){
                    noise=(Double) noiseCache.get(newTerms.get(n));
                }else{
                    for(int o=0;o<numDocs;o++){
                        TermFreqVector test = reader.getTermFreqVector(o,"contents");
                        String[] docTerms = test.getTerms();
                        int[] freqs = test.getTermFrequencies();
                        for(int p=0;p<docTerms.length;p++){
                            if(docTerms[p].equals(newTerms.get(n))){
                                double newNoise=(double) (freqs[p]*numTerms)/(Integer) termDict.get(newTerms.get(n));
                                newNoise*=Math.log((Integer)termDict.get(newTerms.get(n))*freqs[p]);
                                noise+=newNoise;
                                break;
                            }
                        }
                    }
                    noiseCache.put(newTerms.get(n),noise);
                }
                
                rankScores.add(noise*(Integer) termDict.get(newTerms.get(n)));
            }
        }
        //sort
        for(int o=0;o<topTerms;o++){
            double best=0;
            int pos=-1;
            for(int n=0;n<rankScores.size();n++){
                if(rankScores.get(n)>best&&(!bestTerms.contains(newTerms.get(n)))){
                    best=rankScores.get(n);
                    pos=n;
                }
            }
            if(pos>=0){
                bestTerms.add(newTerms.get(pos));
                //System.out.println(newTerms.get(pos));
            }else{
                break;
            }
        }

        //Add relevant terms if they are high ranked
        for(int n=0;n<newTerms.size();n++){
            String term = newTerms.get(n);
            float freq = newFreqs.get(n);
            if(freq>0){
               if(bestTerms.contains(term)){
                    if(newQueryTerms.indexOf(term)==-1){
                        newQueryTerms.add(term);
                        newQueryFreqs.add(freq);
                        //System.out.println("Term added "+term+"-"+freq);
                    }else{
                        float oldFreq=newQueryFreqs.get(newQueryTerms.indexOf(term));
                        freq+=oldFreq;
                        newQueryFreqs.set(newQueryTerms.indexOf(term),freq);
                        //System.out.println("Rel term already there: "+term+"-"+freq);
                    }
               }
            }
        }
        
        //Remove irrelevant terms
         for(int n=0;n<removeTerms.size();n++){
            String term = removeTerms.get(n);
            float freq = removeFreqs.get(n);
            if(freq>0){
                if(newQueryTerms.indexOf(term)!=-1){
                    float oldFreq=newQueryFreqs.get(newQueryTerms.indexOf(term));
                    oldFreq-=freq;
                    newQueryFreqs.set(newQueryTerms.indexOf(term),oldFreq);
                    //System.out.println("Ir term:"+term+"-"+freq+" now:"+oldFreq);
                }
            }
        }
        
        //Now, build the string
        for(int n=0;n<newQueryTerms.size();n++){
            String term = newQueryTerms.get(n);
            float freq = newQueryFreqs.get(n);
            for(int o=0;o<freq;o++){
                newQuery=newQuery+term+" ";
            }
        }

        //System.out.println("\n"+newQuery+"\n");
        return newQuery;
    }
    /**
     * This method uses a custom HitCollector implementation which simply prints out
     * the docId and score of every matching document. 
     * 
     *  This simulates the streaming search use case, where all hits are supposed to
     *  be processed, regardless of their relevance.
     */
    public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException {
        HitCollector streamingHitCollector = new HitCollector() {
            // simply print docId and score of every matching document
            public void collect(int doc, float score) {
                scores.set(doc,score);
                //System.out.println("doc=" + doc + " score=" + score);
                
            }
        };

        searcher.search(query, streamingHitCollector);
    }
    
     public static void doStreamingSearch(final Searcher searcher, Query query, int test) throws IOException {
        HitCollector streamingHitCollector = new HitCollector() {
            // simply print docId and score of every matching document
            public void collect(int doc, float score) {
                fakeScores.set(doc,score);
                //System.out.println("doc=" + doc + " score=" + score);
                
            }
        };

        searcher.search(query, streamingHitCollector);
    }

    /**
     * This demonstrates a typical paging search scenario, where the search engine presents 
     * pages of size n to the user. The user can then go to the next page if interested in
     * the next hits.
     * 
     * When the query is executed for the first time, then only enough results are collected
     * to fill 5 result pages. If the user wants to page beyond this limit, then the query
     * is executed another time and all hits are collected.
     * 
     */
    public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query,
            int hitsPerPage, boolean raw, boolean interactive) throws IOException {

        // Collect enough docs to show 5 pages
        TopDocCollector collector = new TopDocCollector(5 * hitsPerPage);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        int numTotalHits = collector.getTotalHits();
        System.out.println(numTotalHits + " total matching documents");

        int start = 0;
        int end = Math.min(numTotalHits, hitsPerPage);

        while (true) {
            if (end > hits.length) {
                System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected.");
                System.out.println("Collect more (y/n) ?");
                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'n') {
                    break;
                }

                collector = new TopDocCollector(numTotalHits);
                searcher.search(query, collector);
                hits = collector.topDocs().scoreDocs;
            }

            end = Math.min(hits.length, start + hitsPerPage);

            for (int i = start; i < end; i++) {
                if (raw) {                              // output raw format
                    System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                    continue;
                }

                Document doc = searcher.doc(hits[i].doc);
                String path = doc.get("path");
                if (path != null) {
                    System.out.println((i + 1) + ". " + path);
                    String title = doc.get("title");
                    if (title != null) {
                        System.out.println("   Title: " + doc.get("title"));
                    }
                } else {
                    System.out.println((i + 1) + ". " + "No path for this document");
                }

            }

            if (!interactive) {
                break;
            }

            if (numTotalHits >= end) {
                boolean quit = false;
                while (true) {
                    System.out.print("Press ");
                    if (start - hitsPerPage >= 0) {
                        System.out.print("(p)revious page, ");
                    }
                    if (start + hitsPerPage < numTotalHits) {
                        System.out.print("(n)ext page, ");
                    }
                    System.out.println("(q)uit or enter number to jump to a page.");

                    String line = in.readLine();
                    if (line.length() == 0 || line.charAt(0) == 'q') {
                        quit = true;
                        break;
                    }
                    if (line.charAt(0) == 'p') {
                        start = Math.max(0, start - hitsPerPage);
                        break;
                    } else if (line.charAt(0) == 'n') {
                        if (start + hitsPerPage < numTotalHits) {
                            start += hitsPerPage;
                        }
                        break;
                    } else {
                        int page = Integer.parseInt(line);
                        if ((page - 1) * hitsPerPage < numTotalHits) {
                            start = (page - 1) * hitsPerPage;
                            break;
                        } else {
                            System.out.println("No such page");
                        }
                    }
                }
                if (quit) {
                    break;
                }
                end = Math.min(numTotalHits, start + hitsPerPage);
            }

        }

    }
}
