org.dkpro.tc.fstore.simple.DenseFeatureStore.java Source code

Java tutorial

Introduction

Here is the source code for org.dkpro.tc.fstore.simple.DenseFeatureStore.java

Source

/*******************************************************************************
 * Copyright 2016
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package org.dkpro.tc.fstore.simple;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;

import org.dkpro.tc.api.exception.TextClassificationException;
import org.dkpro.tc.api.features.Feature;
import org.dkpro.tc.api.features.FeatureStore;
import org.dkpro.tc.api.features.Instance;

import it.unimi.dsi.fastutil.objects.ObjectArrayList;

/**
 * Data structure that holds instances.
 * 
 */
public class DenseFeatureStore implements FeatureStore {
    private ObjectArrayList<Instance> instanceList;
    private TreeSet<String> featureNames;

    /**
     * Creates an empty feature store
     */
    public DenseFeatureStore() {
        this.instanceList = new ObjectArrayList<>();
        this.featureNames = null;
    }

    @Override
    public void addInstance(Instance instance) throws TextClassificationException {
        if (featureNames == null) {
            featureNames = new TreeSet<String>();
            for (Feature feature : instance.getFeatures()) {
                String name = feature.getName();
                if (featureNames.contains(name)) {
                    throw new TextClassificationException(
                            "Feature with name '" + name + "' is defined multiple times.");
                }
                featureNames.add(name);
            }
        }

        HashSet<String> instanceFeatureNames = new HashSet<String>();
        for (Feature f : instance.getFeatures()) {
            instanceFeatureNames.add(f.getName());
        }
        @SuppressWarnings("unchecked")
        String[] symDiff = new ArrayList<String>(CollectionUtils.disjunction(instanceFeatureNames, featureNames))
                .toArray(new String[] {});
        if (symDiff.length > 0) {
            throw new TextClassificationException(
                    "One or more, but not all of your instances return the following feature(s): "
                            + StringUtils.join(symDiff, " and "));
        }

        instanceList.add(instance);

    }

    @Override
    public Instance getInstance(int i) {
        return instanceList.get(i);
    }

    @Override
    public List<String> getOutcomes(int i) {
        return instanceList.get(i).getOutcomes();
    }

    @Override
    public Double getWeight(int i) {
        return instanceList.get(i).getWeight();
    }

    @Override
    public SortedSet<String> getUniqueOutcomes() {
        SortedSet<String> uniqueOutcomes = new TreeSet<String>();
        for (Instance ins : instanceList) {
            uniqueOutcomes.addAll(ins.getOutcomes());
        }
        return uniqueOutcomes;
    }

    @Override
    public int getNumberOfInstances() {
        return this.instanceList.size();
    }

    @Override
    public Iterable<Instance> getInstances() {
        return new InstancesIterable(this);
    }

    @Override
    public TreeSet<String> getFeatureNames() {
        return featureNames;
    }

    /**
     * Primarily for debug purposes
     *
     * @return all instances, features, mapping, internal state, etc.
     */
    @Override
    public String toString() {
        return "DenseFeatureStore{" + "instanceList=" + instanceList + '}';
    }

    @Override
    public void deleteInstance(int i) {
        instanceList.remove(i);
    }

    @Override
    public boolean isSettingFeatureNamesAllowed() {
        return false;
    }

    @Override
    public void setFeatureNames(TreeSet<String> featureNames) {
        throw new IllegalStateException("Method not allowed in this feature store");
    }

    @Override
    public boolean supportsSparseFeatures() {
        return false;
    }
}