gov.llnl.iscr.iris.Iris.java Source code

Introduction

Here is the source code for gov.llnl.iscr.iris.Iris.java
Source

/**
* Copyright (c) 2011, Lawrence Livermore National Security, LLC. 
* Produced at the Lawrence Livermore National Laboratory. 
* Written by Kevin Lawrence, lawrence22@llnl.gov
* Under the guidance of: 
* David Andrzejewski, andrzejewski1@llnl.gov
* David Buttler, buttler1@llnl.gov 
* LLNL-CODE-521811 All rights reserved. This file is part of IRIS
*
* This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
* License (as published by the Free Software Foundation) version 2, dated June 1991. This program is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with this program; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA For full text see license.txt
*
*
*/
package gov.llnl.iscr.iris;

import gov.llnl.iscr.iris.LDAHandler;
import gov.llnl.iscr.iris.DisMaxQuery;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.solr.common.SolrDocumentList;

import com.mongodb.BasicDBObject;

/**
 * 
 * Iris is the driver for the <code>LDAHandler</code>. It demonstrates the intended usage of the 
 * <code>LDAHandler</code> and simplifies the use of the latent topic retrieval component. 
 * It essentially provides a single class solution for bridging the interaction between 
 * pre-learned latent topics and a pre-existing information retrieval system.
 * <p>A typical invocation sequence is:
 * <blockquote><pre>
 * MongoInstance mongo = new MongoInstance("127.0.0.1", "topicModel");
 * LDAModel model = new LDAModel(mongo);
 * LDAHandler lda = new LDAHandler(model);
 * Iris iris = new Iris(lda);
 * </pre></blockquote>
 * Once instantiated, all calls can be made to iris to achieve the default operations.
 * For customizing: call upon the methods of the other classes of the component. 
 * Otherwise, iris is all you need for implementing the latent topic feedback functionality.
 * <p>For example, a complete sequence to retrieve and display topics to console in Java is:
 * <blockquote><pre>
 * iris.setLatentTopics(results);
 * iris.setLatentTopicsNgrams();
 * Map<Integer, List<BasicDBObject>> ngrams = iris.getLatentTopicNgrams();
 * for(Integer topicID : iris.getLatentTopics()){
 *       System.out.println("Topic: "+topicID);
 *       System.out.println(iris.getTrigram(ngrams.get(topicID)));
 *       System.out.println(iris.getBigrams(ngrams.get(topicID)));
 *       System.out.println(iris.getUnigrams(ngrams.get(topicID)));
 * }
 * </pre></blockquote>
 * Iris may also be used for query expansion. 
 * 
 */

public class Iris {
    LDAHandler lda;
    List<Integer> latentTopics;
    Map<Integer, List<BasicDBObject>> latentTopicNgrams;

    /**
     * returns a list of topic IDs representing the latent topics to be displayed
     * @return
     */
    public List<Integer> getLatentTopics() {
        return latentTopics;
    }

    /**
     * returns a key-value map of the latent topic IDs and their respective ngrams to be displayed
     * @return
     */
    public Map<Integer, List<BasicDBObject>> getLatentTopicNgrams() {
        return latentTopicNgrams;
    }

    /**
     * returns a key-value map of the latent topic IDs and a list of words to be used for query expansion
     * @return
     */
    public Map<Integer, List<String>> getExpansionWords() {
        return lda.getAllExpansionWords();
    }

    /**
     * creates an instance of <code>Iris</code> with given <code>LDAHandler</code>
     * @param lda the LDAHandler to be used by Iris
     */
    public Iris(LDAHandler lda) {
        this.lda = lda;
    }

    /**
     * sets a compiled list of enriched topics and related topics
     * @param results the result list from user query containing documents
     */
    public void setLatentTopics(SolrDocumentList results) {

        lda.setEnrichedTopicSet(Arrays.asList(results.get(0).get("id"), results.get(1).get("id")));
        latentTopics = new ArrayList<Integer>(lda.getEnrichedTopicSet());

        lda.setRelatedTopicSet();
        List<Integer> temp = lda.getRelatedTopicSet();
        for (Integer topicID : temp) {
            if (!latentTopics.contains(topicID))
                latentTopics.add(topicID);
        }

    }

    /**
     * sets a map of topics and selected ngrams, which includes a trigram, two (2) bigrams and four (4) unigrams
     */
    public void setLatentTopicsNgrams() {
        if (!latentTopics.isEmpty()) {
            latentTopicNgrams = new LinkedHashMap<Integer, List<BasicDBObject>>();
            Iterator<Integer> it = latentTopics.iterator();
            Integer selectedTopic;
            List<BasicDBObject> temp;
            while (it.hasNext()) {
                selectedTopic = it.next();
                lda.setNgrams(selectedTopic);
                lda.setUnigrams(selectedTopic);
                temp = new ArrayList<BasicDBObject>(lda.getSelectedNgrams());
                temp.addAll(lda.getSelectedUnigrams());
                latentTopicNgrams.put(selectedTopic, temp);

            }

        }
    }

    /**
     * returns a <code>String</code> representation of a trigram, if it exist, within the given list of ngrams
     * @param ngrams a list of ngram objects
     * @return
     */
    public String getTrigram(List<BasicDBObject> ngrams) {
        BasicDBObject trigram = ngrams.get(0);
        if ((Integer) trigram.get("size") == 3)
            return trigram.getString("ngram");
        else
            return "(No trigrams found)";
    }

    /**
     * returns true if the given list of ngrams contains a trigram, otherwise false
     * @param ngrams a list of ngram objects
     * @return
     */
    public boolean hasTrigram(List<BasicDBObject> ngrams) {
        boolean hasTrigram = false;
        if ((Integer) ngrams.get(0).get("size") == 3)
            hasTrigram = true;

        return hasTrigram;
    }

    /**
     * returns a single formatted <code>String</code> representation of bigrams within the given list of ngrams
     * @param ngrams a list of ngram objects
     * @return
     */
    public String getBigrams(List<BasicDBObject> ngrams) {
        StringBuffer bigrams = new StringBuffer();
        int count = 0;
        for (BasicDBObject bigram : ngrams) {
            if ((Integer) bigram.get("size") == 2) {
                if (count == 0) {
                    bigrams.append(bigram.get("ngram"));
                    ++count;
                } else if (count == 1) {
                    bigrams.append(", " + bigram.get("ngram"));
                    break;
                }
            } else
                continue;
        }

        return bigrams.toString();
    }

    /**
     * returns a single formatted <code>String</code> representation of unigrams within the given list of ngrams
     * @param ngrams
     * @return
     */
    public String getUnigrams(List<BasicDBObject> ngrams) {
        StringBuffer unigrams = new StringBuffer();
        int count = 0;
        for (BasicDBObject unigram : ngrams) {
            if (unigram.containsField("word")) {
                if (count == 0) {
                    unigrams.append(unigram.get("word"));
                    ++count;
                } else if (count < 4) {
                    unigrams.append(", " + unigram.get("word"));
                    ++count;
                } else
                    break;
            }
        }
        return unigrams.toString();
    }

    /**
     * returns the given {@link DisMaxQuery} with appended expansion terms 
     * retrieved from the given list of topics. It calls on {@link DisMaxQuery#addBoostQuery(Map)}
     * to perform the expansion using the default boost value.
     * @param query the DisMaxQuery object to be expanded
     * @param topics the list of topics used to retrieve the expansion words for boosting
     * @param sign used to distinguish between positive ('+') and "negative" ('-') boosting
     * @return
     */
    public DisMaxQuery expandBoostQuery(DisMaxQuery query, List<String> topics, char sign) {
        query.addBoostQuery(buildBoostQueryMap(getListOfTerms(topics), query.getDefaultBoost(), sign));
        return query;
    }

    /**
     * returns the given <code>DisMaxQuery</code> with appended expansion terms
     * retrieved from the given list of topics. It calls on {@link DisMaxQuery#addBoostQuery(String, Map)}
     * to perform the expansion using the default boost value.
     * @param query the DisMaxQuery object to be expanded
     * @param topics the list of topics used to retrieve the expansion words for boosting
     * @param field represents the indexed field to boost on
     * @param sign used to distinguish between positive ('+') and "negative" ('-') boosting
     * @return
     */
    public DisMaxQuery expandBoostQuery(DisMaxQuery query, List<String> topics, String field, char sign) {
        query.addBoostQuery(field, buildBoostQueryMap(getListOfTerms(topics), query.getDefaultBoost(), sign));
        return query;
    }

    /**
     * returns the given <code>DisMaxQuery</code> with appended expansion terms
     * retrieved from the given list of topics. It calls on the <code>DisMaxQuery</code> 
     * <code>method addBoostQuery(Map<String, Float>)</code>
     * to perform the expansion using the given boost value.
     * @param query the DisMaxQuery object to be expanded
     * @param topics the list of topics used to retrieve the expansion words for boosting
     * @param boost the value used to boost the query terms
     * @param sign used to distinguish between positive ('+') and "negative" ('-') boosting
     * @return
     */
    public DisMaxQuery expandBoostQuery(DisMaxQuery query, List<String> topics, float boost, char sign) {
        query.addBoostQuery(buildBoostQueryMap(getListOfTerms(topics), boost, sign));
        return query;
    }

    /**
     * returns the given <code>DisMaxQuery</code> with appended expansion terms
     * retrieved from the given list of topics. It calls on the <code>DisMaxQuery</code> 
     * method <code>addBoostQuery(String, Map<String, Float>)</code>
     * to perform the expansion using the given boost value.
     * @param query the DisMaxQuery object to be expanded
     * @param topics the list of topics used to retrieve the expansion words for boosting
     * @param field represents the indexed field to boost on
     * @param boost the value used to boost the query terms
     * @param sign used to distinguish between positive ('+') and "negative" ('-') boosting
     * @return
     */
    public DisMaxQuery expandBoostQuery(DisMaxQuery query, List<String> topics, String field, float boost,
            char sign) {
        query.addBoostQuery(field, buildBoostQueryMap(getListOfTerms(topics), boost, sign));
        return query;
    }

    /**
     * returns the given {@link DisMaxQuery} with a new set of expansion terms 
     * retrieved from the given list of topics. It calls on {@link DisMaxQuery#setBoostQuery(Map)}
     * to perform the expansion using the default boost value.
     * @param query the DisMaxQuery object to be expanded
     * @param topics the list of topics used to retrieve the expansion words for boosting
     * @return
     */
    public DisMaxQuery resetBoostQuery(DisMaxQuery query, List<String> topics) {
        query.setBoostQuery(buildBoostQueryMap(getListOfTerms(topics), query.getDefaultBoost(), '+'));
        return query;
    }

    /**
     * returns the given <code>DisMaxQuery</code> with a new set of expansion terms
     * retrieved from the given list of topics. It calls on {@link DisMaxQuery#setBoostQuery(String, Map)}
     * to perform the expansion using the default boost value.
     * @param query the DisMaxQuery object to be expanded
     * @param topics the list of topics used to retrieve the expansion words for boosting
     * @param field represents the indexed field to boost on
     * @return
     */
    public DisMaxQuery resetBoostQuery(DisMaxQuery query, List<String> topics, String field) {
        query.setBoostQuery(field, buildBoostQueryMap(getListOfTerms(topics), query.getDefaultBoost(), '+'));
        return query;
    }

    /**
     * returns the given <code>DisMaxQuery</code> with a new set of expansion terms
     * retrieved from the given list of topics. It calls on the <code>DisMaxQuery</code> 
     * method <code>setBoostQuery(Map<String, Float>)</code>
     * to perform the expansion using the given boost value.
     * @param query the DisMaxQuery object to be expanded
     * @param topics the list of topics used to retrieve the expansion words for boosting
     * @param boost the value used to boost the query terms
     * @return
     */
    public DisMaxQuery resetBoostQuery(DisMaxQuery query, List<String> topics, float boost) {
        query.setBoostQuery(buildBoostQueryMap(getListOfTerms(topics), boost, '+'));
        return query;
    }

    /**
     * returns the given <code>DisMaxQuery</code> with a new set of expansion terms
     * retrieved from the given list of topics. It calls on the <code>DisMaxQuery</code> 
     * method <code>setBoostQuery(String, Map<String, Float>)</code>
     * to perform the expansion using the given boost value.
     * @param query the DisMaxQuery object to be expanded
     * @param topics the list of topics used to retrieve the expansion words for boosting
     * @param field represents the indexed field to boost on
     * @param boost the value used to boost the query terms
     * @return
     */
    public DisMaxQuery resetBoostQuery(DisMaxQuery query, List<String> topics, String field, float boost) {
        query.setBoostQuery(field, buildBoostQueryMap(getListOfTerms(topics), boost, '+'));
        return query;
    }

    //-|===============================================
    //-|Private helper methods used in query expansion
    //-|===============================================
    /**
     * builds and returns a map that represents the values of a boost query (bq) parameter for a Disjunction Max query.
     * It contains the mapping of query terms and associated boost values.
     * @param terms represents the query terms to boost
     * @param boost the value used to boost the query terms
     * @param sign used to distinguish between positive ('+') and "negative" ('-') boosting
     * @return
     * @see {@link http://wiki.apache.org/solr/DisMaxQParserPlugin}
     */
    private Map<String, Float> buildBoostQueryMap(Iterator<String> terms, float boost, char sign) {
        Map<String, Float> boostQueries = new LinkedHashMap<String, Float>();
        switch (sign) {
        case '-':
            while (terms.hasNext()) {
                boostQueries.put("-" + terms.next(), boost);
            }
        case '+':
        default:
            while (terms.hasNext()) {
                boostQueries.put(terms.next(), boost);
            }
            break;
        }
        return boostQueries;
    }

    /**
     * returns an iterator list of words to be used for query expansion for the given list of topics
     * @param topics represents the list of topics chosen (topic IDs are accepted as <code>String</code>
     * @return
     */
    private Iterator<String> getListOfTerms(List<String> topics) {
        List<String> bqTerms = new ArrayList<String>();
        for (String topicID : topics) {
            bqTerms.addAll(lda.getTopicExpansionWords(Integer.parseInt(topicID)));
        }
        Iterator<String> it = bqTerms.iterator();
        return it;
    }
}