game.TermFreq.java Source code

Java tutorial

Introduction

Here is the source code for game.TermFreq.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package game;

import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.util.BytesRef;
import retriever.TrecDocRetriever;
import static retriever.TrecDocRetriever.FIELD_ANALYZED_CONTENT;
import trec.TRECQuery;

/**
 *
 * @author Debasis
 */
class TermFreq implements Comparable<TermFreq> {
    Term term;
    String termStr;
    float tf; // document component
    float wt;

    public TermFreq(Term term, String termStr, int tf) {
        this.term = term;
        this.tf = tf;
        this.termStr = termStr;
    }

    @Override
    public int compareTo(TermFreq t) {
        return -1 * new Float(wt).compareTo(t.wt); // descending
    }

    @Override
    public String toString() {
        return "(" + termStr + ", " + wt + ")";
    }
}

class TermFreqComparator_Freq implements Comparator<TermFreq> {

    @Override
    public int compare(TermFreq a, TermFreq b) {
        Integer aLen = (int) a.tf;
        Integer bLen = (int) b.tf;
        return -1 * aLen.compareTo(bLen); // decreasing
    }
}

class UserSubmitInfo {
    String wordsShared;
    int luceneDocId;
    String docSubmitted;
    boolean relGuess;

    public UserSubmitInfo(String wordsShared, int luceneDocId, String docSubmitted, boolean relGuess) {
        this.wordsShared = wordsShared;
        this.docSubmitted = docSubmitted;
        this.luceneDocId = luceneDocId;
        this.relGuess = relGuess;
    }

    @Override
    public String toString() {
        String className = relGuess ? "rel" : "nrel";
        StringBuffer buff = new StringBuffer();
        buff.append("<td>");

        if (luceneDocId >= 0) {
            buff.append("<a id='").append(luceneDocId).append("' name='").append(docSubmitted).append("' class='")
                    .append(className).append("'>").append(docSubmitted).append("</a>");
        }

        buff.append("</td>").append("<td>");

        if (luceneDocId >= 0) {
            buff.append(this.relGuess ? "<img src='images/tick.jpg'>" : "<img src='images/cross.jpg'");
        }

        buff.append("</td>").append("<td>").append(wordsShared).append("</td>");

        return buff.toString();
    }
}

public class GameState {
    String sessionId;
    String qid; // the query id
    TRECQuery query;
    long startingEpochs;

    String docIdToGuess; // the doc id which the user needs to guess
    AllRelRcds rels;
    TrecDocRetriever retriever;

    Document docToGuess;
    int luceneDocIdToGuess;
    String contentOfDocToGuess;

    int numTermsToShare; // number of terms to share in each round

    int score;
    boolean startState;
    int numTermsShared;
    List<UserSubmitInfo> submitInfos;
    HashMap<String, String> docsSubmitted;

    // Instantaneous state variables
    String lastDocumentSubmitted;
    String lastUserQuery;

    String wordsSharedNow; // words just shared
    boolean correctGuess;
    boolean relGuess;
    boolean usingTrueRJ;
    int terminateCode;

    String logFileName;

    List<TermFreq> tfvec; // term freq vec of the doc to be guessed
    TermFreqComparator_Freq tfcomp_freq;

    static final float LAMBDA = 0.6f;
    static final float ONE_MINUS_LAMBDA = 1 - LAMBDA;

    // Termination Codes
    static final int DOC_ALREADY_GUESSED = 4;
    static final int GAME_TO_CONTINUE = 0;
    static final int CORRECT_GUESS_FOUND = 1;
    static final int SCORE_REACHED_MIN_THRESH = 2;

    // SCORE UPDATES...
    static final int INIT_SCORE = 10;
    static final int GAME_TERMINATION_SCORE = 0; // stop the game if this score is reached
    static final int SCORE_INCREMENET_FOR_CORRECT_GUESS = 20;
    static final int SCORE_INCREMENET_FOR_CORRECT_REL = 5;
    static final int SCORE_INCREMENET_FOR_INCORRECT_REL = -2;

    public GameState(TrecDocRetriever retriever, AllRelRcds rels, String sessionId, boolean usingTrueRJ) {
        this.sessionId = sessionId;
        this.retriever = retriever;
        this.rels = rels;
        startingEpochs = System.currentTimeMillis();
        logFileName = retriever.getProperties().getProperty("gamelog.file");

        // Pick one random query
        qid = rels.selectRandomQuery();
        // Pick one relevant document for this query at random
        docIdToGuess = rels.selectRandomRelDoc(qid);

        // Load the Lucene document object for the doc to be guessed...
        // This is going to be used for selecting random terms from this doc...
        try {
            loadDoc();
            loadTfVec();
        } catch (Exception ex) {
            ex.printStackTrace();
        }

        Properties prop = retriever.getProperties();
        numTermsToShare = Integer.parseInt(prop.getProperty("game.numterms", "3"));
        this.query = retriever.getQuery(qid);

        numTermsShared = 0;
        startState = true;

        tfcomp_freq = new TermFreqComparator_Freq();
        submitInfos = new ArrayList<>();
        docsSubmitted = new HashMap<>();

        this.usingTrueRJ = usingTrueRJ;
    }

    public boolean getGameMode() {
        return usingTrueRJ;
    }

    void loadTfVec() throws Exception {

        IndexReader reader = retriever.getReader();
        long sumDf = reader.getSumDocFreq(TrecDocRetriever.FIELD_ANALYZED_CONTENT);

        Terms terms = reader.getTermVector(luceneDocIdToGuess, FIELD_ANALYZED_CONTENT);
        if (terms == null || terms.size() == 0)
            return;

        TermsEnum termsEnum;
        BytesRef term;
        tfvec = new ArrayList<>();

        // Construct the normalized tf vector
        termsEnum = terms.iterator(null); // access the terms for this field
        int doclen = 0;
        while ((term = termsEnum.next()) != null) { // explore the terms for this field
            String termStr = term.utf8ToString();
            String stem = retriever.analyze(termStr);
            DocsEnum docsEnum = termsEnum.docs(null, null); // enumerate through documents, in this case only one
            while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                //get the term frequency in the document
                int tf = docsEnum.freq();
                TermFreq tfq = new TermFreq(new Term(TrecDocRetriever.FIELD_ANALYZED_CONTENT, term), termStr, tf);
                tfvec.add(tfq);

                doclen += tf;
            }
        }

        for (TermFreq tf : tfvec) {
            tf.tf = tf.tf / (float) doclen; // normalize by len
            float idf = sumDf / reader.docFreq(tf.term);
            tf.wt = (float) (Math.log(1 + LAMBDA / (ONE_MINUS_LAMBDA) * tf.tf * idf));
        }

        Collections.sort(tfvec);
    }

    void loadDoc() throws Exception {
        IndexReader reader = retriever.getReader();
        IndexSearcher searcher = retriever.getSearcher();

        Term docIdTerm = new Term(TrecDocRetriever.FIELD_ID, this.docIdToGuess);
        TermQuery tq = new TermQuery(docIdTerm);

        TopScoreDocCollector collector = TopScoreDocCollector.create(1, true);
        searcher.search(tq, collector);
        this.luceneDocIdToGuess = collector.topDocs().scoreDocs[0].doc;
        this.docToGuess = reader.document(luceneDocIdToGuess);
        this.contentOfDocToGuess = docToGuess.get(FIELD_ANALYZED_CONTENT);
    }

    String deStemWord(String word) {
        Pattern p = Pattern.compile(word + "\\S*");
        Matcher matcher = p.matcher(contentOfDocToGuess);

        if (matcher.find()) {
            String matched = matcher.group();
            int len = matched.length();
            char lastChar = matched.charAt(len - 1);
            if (!Character.isLetter(lastChar))
                matched = matched.substring(0, len - 1);
            return matched;
        }

        return word;
    }

    public void selectWords() {

        if (startState) {
            startState = false;
            wordsSharedNow = query.title;
            return;
        }

        StringBuffer buff = new StringBuffer();
        int start = numTermsShared;
        int end = Math.min(start + numTermsToShare, tfvec.size());

        if (start >= end) {
            wordsSharedNow = "No terms left to share!";
            return;
        }

        for (int i = start; i < end; i++) {
            String stemmedWord = tfvec.get(i).termStr;
            String deStemmed = deStemWord(stemmedWord); // take the most frequent destem
            buff.append(deStemmed).append(" ");
        }

        numTermsShared = end;
        wordsSharedNow = buff.toString();
    }

    public String getDocToGuess() {
        return this.docIdToGuess;
    }

    public void logGameState() {
        try {
            FileWriter fw = new FileWriter(logFileName, true);
            synchronized (this) {
                // Save the game state
                fw.write(this.toString() + "\n");
            }
            fw.close();
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }

    @Override
    public String toString() {
        StringBuffer buff = new StringBuffer();
        buff.append(this.sessionId).append("\t").append(this.usingTrueRJ).append("\t").append(this.startingEpochs)
                .append("\t").append(this.qid).append("\t").append(this.docIdToGuess).append("\t")
                .append(this.lastDocumentSubmitted).append("\t").append(this.wordsSharedNow).append("\t")
                .append(this.lastUserQuery).append("\t").append(this.relGuess);

        return buff.toString();
    }

    // This function works in two modes. If the game mode uses
    // true relevance judgments, the function conducts a simple
    // lookup else it makes a guess based on the normalized
    // similarity score... U(sim_score)
    boolean isRelDocGuessed(String qid, String guessedDocName, float retScore) {
        if (usingTrueRJ)
            return this.rels.isRel(qid, guessedDocName); // simple table lookup

        // Uniform Prob.
        float p = (float) Math.random();
        return p < retScore;
    }

    // Update the game state based on user move... User
    // submits a document as a guess...
    public void update(int guessedDocId, String guessedDocName, String query, float retScore) {
        lastUserQuery = query;
        lastDocumentSubmitted = guessedDocName;
        terminateCode = GAME_TO_CONTINUE;

        if (docsSubmitted.containsKey(guessedDocName)) {
            // if document already submitted
            terminateCode = DOC_ALREADY_GUESSED;
        } else if (guessedDocName.equalsIgnoreCase(docIdToGuess)) {
            // user guessed correctly...
            correctGuess = true;
            relGuess = true;
            score += SCORE_INCREMENET_FOR_CORRECT_GUESS;
            terminateCode = CORRECT_GUESS_FOUND;
        } else if (isRelDocGuessed(qid, guessedDocName, retScore)) {
            // a correct relevant document is guessed
            relGuess = true;
            score += SCORE_INCREMENET_FOR_CORRECT_REL;
        } else if (this.startState) {
            // new game starting
            score = INIT_SCORE;
        } else {
            relGuess = false;
            score += SCORE_INCREMENET_FOR_INCORRECT_REL;
        }
        if (score == GAME_TERMINATION_SCORE) {
            terminateCode = SCORE_REACHED_MIN_THRESH;
        } else if (terminateCode != DOC_ALREADY_GUESSED) {
            // Got to share words with the player...
            selectWords();
            submitInfos.add(new UserSubmitInfo(wordsSharedNow, guessedDocId, guessedDocName, relGuess));
            docsSubmitted.put(guessedDocName, guessedDocName);
        }
        logGameState();
    }

    public String buildJSON() {
        StringBuffer buff = new StringBuffer("{");
        buff.append("\"score\": \"").append(score).append("\", ");

        /* Return all the words everytime so that the client side
           needn't do anything else but to set the HTML...
        */
        buff.append("\"words\": \"");
        buff.append("<table>");
        for (UserSubmitInfo submitInfo : this.submitInfos) {
            buff.append("<tr>").append(submitInfo).append("</tr>");
        }
        buff.append("</table>");
        buff.append("\", ");

        buff.append("\"terminate\": ").append(terminateCode).append(", ");

        String msg = correctGuess ? "You WIN!! You have guessed the document " + this.docIdToGuess + " correctly."
                : relGuess ? "Congratulations!! You have hit a relevant document."
                        : wordsSharedNow == query.title ? "Shared the query to start with."
                                : "Oops!! Wrong guess and no hit on a relevant document.";
        buff.append("\"msg\": \"").append(msg).append("\"");
        buff.append("}");
        return buff.toString();
    }
}