edu.sabanciuniv.sentilab.sare.controllers.setcover.SetCoverFactory.java Source code

Java tutorial

Introduction

Here is the source code for edu.sabanciuniv.sentilab.sare.controllers.setcover.SetCoverFactory.java

Source

/*
 * Sentilab SARE: a Sentiment Analysis Research Environment
 * Copyright (C) 2013 Sabanci University Sentilab
 * http://sentilab.sabanciuniv.edu
 * 
 * This file is part of SARE.
 * 
 * SARE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *  
 * SARE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with SARE. If not, see <http://www.gnu.org/licenses/>.
 */

package edu.sabanciuniv.sentilab.sare.controllers.setcover;

import java.util.*;

import org.apache.commons.lang3.*;

import com.google.common.collect.*;

import edu.sabanciuniv.sentilab.core.controllers.*;
import edu.sabanciuniv.sentilab.core.models.factory.IllegalFactoryOptionsException;
import edu.sabanciuniv.sentilab.sare.controllers.base.documentStore.*;
import edu.sabanciuniv.sentilab.sare.models.base.document.*;
import edu.sabanciuniv.sentilab.sare.models.base.documentStore.PersistentDocumentStore;
import edu.sabanciuniv.sentilab.sare.models.setcover.*;
import edu.sabanciuniv.sentilab.utils.CannedMessages;

/**
 * A class that can work on {@link SetCoverDocument} objects.
 * @author Mus'ab Husaini
 */
public class SetCoverFactory extends PersistentDocumentStoreFactory<DocumentSetCover>
        implements DocumentStoreController, ProgressObservablePrimitive {

    public static final double DEFAULT_WEIGHT_COVERAGE = 1.0;

    private PersistentDocumentStore store;
    private TokenizingOptions tokenizingOptions;
    private double weightCoverage;

    private Set<ProgressObserver> progressObservers;

    /**
     * Creates an instance of the {@link SetCoverFactory}.
     */
    public SetCoverFactory() {
        this.weightCoverage = DEFAULT_WEIGHT_COVERAGE;
        this.progressObservers = Sets.newHashSet();
    }

    private DocumentSetCover createSpecific(DocumentSetCover setcover, PersistentDocumentStore store,
            TokenizingOptions tokenizingOptions, double weightCoverage) {
        Validate.notNull(store, CannedMessages.NULL_ARGUMENT, "store");

        if (tokenizingOptions == null) {
            tokenizingOptions = new TokenizingOptions();
        }

        double progress = 0.0;
        int storeSize = Iterables.size(store.getDocuments());

        // create a dummy set cover to keep the extra stuff in.
        DocumentSetCover dummySetCover = new DocumentSetCover(store);

        List<PersistentDocument> corpusDocuments = Lists.newArrayList(store.getDocuments());
        List<SetCoverDocument> setCoverDocuments = Lists.newArrayList(setcover.getAllDocuments());

        // for each store document.
        for (PersistentDocument document : corpusDocuments) {
            // create a copy of the current document as a set cover document.
            SetCoverDocument workingDocument = (SetCoverDocument) new SetCoverDocument(document)
                    .setTokenizingOptions(tokenizingOptions).setStore(dummySetCover);

            // loop through all set cover documents.
            for (int index = 0; index < setCoverDocuments.size(); index++) {
                SetCoverDocument setCoverDocument = setCoverDocuments.get(index);

                // create a working reference to the set cover document.
                SetCoverDocument workingSCDocument = setCoverDocument;

                // get merge weights on both directions.
                double forwardMerge = workingSCDocument.getMergedWeight(workingDocument);
                double backwardMerge = workingDocument.getMergedWeight(workingSCDocument);

                // if we get more weight on the backward merge, swap the documents.
                if (forwardMerge < backwardMerge) {
                    setCoverDocuments.add(setCoverDocuments.indexOf(workingSCDocument), workingDocument);
                    setCoverDocuments.remove(workingSCDocument);

                    SetCoverDocument tmpSCDocument = workingDocument;
                    workingDocument = workingSCDocument;
                    workingSCDocument = tmpSCDocument;
                }

                // perform the merge.
                workingSCDocument.merge(workingDocument);

                // if the entire document has been consumed, then we are done with it.
                if (workingDocument.getTotalTokenWeight() == 0) {
                    break;
                }
            }

            setCoverDocuments.add(workingDocument);

            // if the document was completely consumed, then we mark it as uncovered.
            if (workingDocument.getTotalTokenWeight() == 0) {
                workingDocument.setCovered(false);
            }

            progress += 1.0 / storeSize;
            this.notifyProgress(progress, "create");
        }

        setcover.setDocuments(setCoverDocuments);

        // get rid of the dummy.
        dummySetCover.setBaseStore(null);

        return setcover.adjustCoverage(weightCoverage).setTokenizingOptions(tokenizingOptions);
    }

    @Override
    protected DocumentSetCover createPrivate(DocumentSetCover setcover) throws IllegalFactoryOptionsException {

        try {
            Validate.notNull(this.getStore(), CannedMessages.NULL_ARGUMENT, "this.store");
        } catch (NullPointerException e) {
            throw new IllegalFactoryOptionsException(e);
        }

        if (setcover == null) {
            // create a set cover based on this store.
            setcover = new DocumentSetCover(this.getStore());
        }

        if (ObjectUtils.equals(this.getTokenizingOptions(), setcover.getTokenizingOptions())) {
            if (ObjectUtils.equals(this.getWeightCoverage(),
                    ObjectUtils.defaultIfNull(setcover.getWeightCoverage(), DEFAULT_WEIGHT_COVERAGE))) {
                // in this case, nothing to do here.
                return setcover;
            } else {
                // in this case, we just need to adjust coverage.
                return setcover.adjustCoverage(this.getWeightCoverage());
            }
        }

        // clear set cover if it already exists, we'll start afresh.
        setcover.clear();
        this.createSpecific(setcover, this.getStore(), this.getTokenizingOptions(), this.getWeightCoverage());

        return setcover;
    }

    /**
     * Gets the store from which the set cover will be created.
     * @return the {@link PersistentDocumentStore} object from which the set cover will be created.
     */
    public PersistentDocumentStore getStore() {
        return this.store;
    }

    /**
     * Sets the store from which the set cover is to be created.
     * @param store the {@link PersistentDocumentStore} object from which the set cover is to be created.
     * @return the {@code this} object.
     */
    public SetCoverFactory setStore(PersistentDocumentStore store) {
        this.store = store;
        return this;
    }

    /**
     * Gets the tokenizing options that will be used to tokenize the content of the documents.
     * @return the {@link TokenizingOptions} object containing the tokenizing options.
     */
    public TokenizingOptions getTokenizingOptions() {
        return this.tokenizingOptions;
    }

    /**
     * Sets the tokenizing options to be used for tokenizing the content of the documents.
     * @param tokenizingOptions the {@link TokenizingOptions} object containing the tokenizing options.
     * @return the {@code this} object.
     */
    public SetCoverFactory setTokenizingOptions(TokenizingOptions tokenizingOptions) {
        this.tokenizingOptions = tokenizingOptions;
        return this;
    }

    /**
     * Gets the token weight coverage to be maintained in the final set cover.
     * @return the token weight coverage; must be in [0.0, 1.0].
     */
    public double getWeightCoverage() {
        return this.weightCoverage;
    }

    /**
     * Sets the desired token weight coverage for the set cover.
     * @param weightCoverage the desired token weight coverage; must be in [0.0, 1.0].
     * @return the {@code this} object.
     */
    public SetCoverFactory setWeightCoverage(double weightCoverage) {
        Validate.isTrue(weightCoverage >= 0.0 && weightCoverage <= 1.0,
                "weight coverage must be in the interval [0.0, 1.0].");

        this.weightCoverage = weightCoverage;
        return this;
    }

    @Override
    public ProgressObservablePrimitive addProgessObserver(ProgressObserver observer) {
        this.progressObservers.add(observer);
        return this;
    }

    @Override
    public boolean removeProgressObserver(ProgressObserver observer) {
        return this.progressObservers.remove(observer);
    }

    @Override
    public void notifyProgress(double progress, String message) {
        for (ProgressObserver progressObserver : this.progressObservers) {
            progressObserver.observe(progress, message);
        }
    }
}