edu.txstate.dmlab.clusteringwiki.suffixtree.PhraseList.java Source code

Java tutorial

Introduction

Here is the source code for edu.txstate.dmlab.clusteringwiki.suffixtree.PhraseList.java

Source

package edu.txstate.dmlab.clusteringwiki.suffixtree;

/**
 *  ClusteringWiki - personalized and collaborative clustering of search results
 *  Copyright (C) 2010  Texas State University-San Marcos
 *  
 *  Contact: http://dmlab.cs.txstate.edu
 * 
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

import java.util.ArrayList;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.util.OpenBitSet;

/**
 * List of Phrases able to greedily retrieve top quality phrase
 * and remove phrases no longer meeting coverage criteria
 * 
 * @author David C. Anastasiu
 *
 */
public abstract class PhraseList extends ArrayList<Phrase> {

    /**
     * Serial version uid
     */
    private static final long serialVersionUID = 2391794554871731196L;

    protected final int initialSize;

    protected final int minCardinality;

    protected final int minPhraseLength;

    protected final int maxPhraseLength;

    /**
     * Create a new pq with the same parameters as an existing pq
     * @param like
     */
    public PhraseList(PhraseList like) {
        super(like.initialSize);
        initialSize = like.initialSize;
        minPhraseLength = like.minPhraseLength;
        maxPhraseLength = like.maxPhraseLength;
        minCardinality = like.minCardinality;
    }

    /**
     * Constructor
     * @param theMaxSize
     * @param theMinPhraseLength
     * @param theMaxPhraseLength
     * @param theMinCardinality
     */
    public PhraseList(int theMaxSize, int theMinPhraseLength, int theMaxPhraseLength, int theMinCardinality) {
        super(theMaxSize);
        initialSize = theMaxSize;
        minPhraseLength = theMinPhraseLength;
        maxPhraseLength = theMaxPhraseLength;
        minCardinality = theMinCardinality;
    }

    /**
     * Decide whether a phrase is less important than another
     * @param p1
     * @param p2
     * @return
     */
    protected abstract boolean greaterThan(Phrase p1, Phrase p2);

    /**
     * Return <code>true</code> if a cluster with <code>score</code> will be added to
     * the priority queue.
     */
    public abstract boolean shouldInsert(int length, int cardinality);

    /**
     * Insert elem in the list
     * @param p
     */
    public void insert(Phrase p) {
        if (shouldInsert(p.length, p.support))
            add(p);
    }

    /**
     * Get top phrase after removing covered docs
     * Phrases that no longer meet the necessary conditions to
     * be part of the list will be removed
     * Super-phrases of the chosen phrase will also be removed
     * @param coveredDocs
     * @return
     */
    public Phrase getTopPhrase(OpenBitSet coveredDocs) {
        if (this.size() == 0)
            return null;
        Phrase p = null;
        int index = -1;
        //Greedily find the best phrase
        for (int i = 0; i < this.size(); i++) {
            final Phrase p2 = this.get(i);
            final int suport = p2.supportAfterDocsRemoval(coveredDocs);
            if (!this.shouldInsert(p2.length, suport)) {
                this.remove(i);
                i--;
                continue;
            }
            if (p == null || this.greaterThan(p2, p)) {
                p = p2;
                index = i;
            }
        }
        if (index >= 0)
            this.remove(index);
        return p;
    }

    /**
     * String representation of the list
     */
    public String toString() {
        String s = "  initialSize: " + initialSize + "\n  minCardinality: " + minCardinality
                + "\n  minPhraseLength: " + minPhraseLength + "\n  maxPhraseLength: " + maxPhraseLength
                + "\n  Elements: \n\n";
        return s + StringUtils.join(this, "\n");
    }

}