de.tudarmstadt.ukp.dkpro.tc.fstore.simple.DenseFeatureStore.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.dkpro.tc.fstore.simple.DenseFeatureStore.java

Source

/*******************************************************************************
 * Copyright 2015
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tudarmstadt.ukp.dkpro.tc.fstore.simple;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;

import de.tudarmstadt.ukp.dkpro.tc.api.exception.TextClassificationException;
import de.tudarmstadt.ukp.dkpro.tc.api.features.Feature;
import de.tudarmstadt.ukp.dkpro.tc.api.features.FeatureStore;
import de.tudarmstadt.ukp.dkpro.tc.api.features.Instance;

/**
 * Data structure that holds instances.
 * 
 * @author zesch
 * 
 */
public class DenseFeatureStore implements FeatureStore {
    private List<List<Object>> instanceList;
    private List<List<String>> outcomeList;
    private List<Double> weightList;
    private List<Integer> sequenceIds;
    private List<Integer> sequencePositions;
    private TreeSet<String> featureNames;

    /**
     * Creates an empty feature store
     */
    public DenseFeatureStore() {
        this.instanceList = new ArrayList<List<Object>>();
        this.outcomeList = new ArrayList<List<String>>();
        this.weightList = new ArrayList<Double>();
        this.sequenceIds = new ArrayList<Integer>();
        this.sequencePositions = new ArrayList<Integer>();
        this.featureNames = null;
    }

    @Override
    public void addInstance(Instance instance) throws TextClassificationException {
        if (featureNames == null) {
            featureNames = new TreeSet<String>();
            for (Feature feature : instance.getFeatures()) {
                String name = feature.getName();
                if (featureNames.contains(name)) {
                    throw new TextClassificationException(
                            "Feature with name '" + name + "' is defined multiple times.");
                }
                featureNames.add(name);
            }
        }

        HashSet<String> instanceFeatureNames = new HashSet<String>();
        for (Feature f : instance.getFeatures()) {
            instanceFeatureNames.add(f.getName());
        }
        @SuppressWarnings("unchecked")
        String[] symDiff = new ArrayList<String>(CollectionUtils.disjunction(instanceFeatureNames, featureNames))
                .toArray(new String[] {});
        if (symDiff.length > 0) {
            throw new TextClassificationException(
                    "One or more, but not all of your instances return the following feature(s): "
                            + StringUtils.join(symDiff, " and "));
        }

        // create map of feature names and offset in set
        Map<String, Integer> sortedFeatureNameMap = new HashMap<String, Integer>();
        int offset = 0;
        Iterator<String> iterator = featureNames.iterator();
        while (iterator.hasNext()) {
            sortedFeatureNameMap.put(iterator.next(), offset);
            offset++;
        }

        Object[] values = new Object[featureNames.size()];
        for (Feature feature : instance.getFeatures()) {
            values[sortedFeatureNameMap.get(feature.getName())] = feature.getValue();
        }
        this.instanceList.add(Arrays.asList(values));
        this.outcomeList.add(instance.getOutcomes());
        this.weightList.add(instance.getWeight());
        this.sequenceIds.add(instance.getSequenceId());
        this.sequencePositions.add(instance.getSequencePosition());
    }

    @Override
    public Instance getInstance(int i) {
        List<Feature> features = new ArrayList<Feature>();

        List<Object> values = instanceList.get(i);

        int offset = 0;
        Iterator<String> sortedNames = getFeatureNames().iterator();
        while (sortedNames.hasNext()) {
            String name = sortedNames.next();
            Feature feature = new Feature(name, values.get(offset));
            features.add(feature);
            offset++;
        }

        Instance instance = new Instance(features, outcomeList.get(i));
        instance.setWeight(weightList.get(i));
        instance.setSequenceId(sequenceIds.get(i));
        instance.setSequencePosition(sequencePositions.get(i));
        return instance;
    }

    @Override
    public List<String> getOutcomes(int i) {
        return this.outcomeList.get(i);
    }

    @Override
    public Double getWeight(int i) {
        return this.weightList.get(i);
    }

    // public List<List<String>> getOutcomeLists()
    // {
    // return outcomeList;
    // }

    @Override
    public SortedSet<String> getUniqueOutcomes() {
        SortedSet<String> uniqueOutcomes = new TreeSet<String>();
        for (List<String> outcomes : outcomeList) {
            uniqueOutcomes.addAll(outcomes);
        }
        return uniqueOutcomes;
    }

    // public void setOutcomeList(List<List<String>> outcomeList)
    // {
    // this.outcomeList = outcomeList;
    // }

    @Override
    public int getNumberOfInstances() {
        return this.instanceList.size();
    }

    @Override
    public Iterable<Instance> getInstances() {
        return new InstancesIterable(this);
    }

    @Override
    public TreeSet<String> getFeatureNames() {
        return featureNames;
    }

    /**
     * Primarily for debug purposes
     *
     * @return all instances, features, mapping, internal state, etc.
     */
    @Override
    public String toString() {
        return "SparseFeatureStore{" + "instanceList=" + instanceList + ", outcomeList=" + outcomeList
                + ", weightList=" + weightList + ", sequenceIds=" + sequenceIds + ", sequencePositions="
                + sequencePositions + '}';
    }

    @Override
    public void deleteInstance(int i) {
        instanceList.remove(i);
    }

    @Override
    public boolean isSettingFeatureNamesAllowed() {
        return false;
    }

    @Override
    public void setFeatureNames(TreeSet<String> featureNames) {
        throw new IllegalStateException("Method not allowed in this feature store");
    }
}