de.tudarmstadt.ukp.dkpro.argumentation.sequence.feature.meta.AbstractSequenceMetaDataFeatureGenerator.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.dkpro.argumentation.sequence.feature.meta.AbstractSequenceMetaDataFeatureGenerator.java

Source

/*
 * Copyright 2015
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package de.tudarmstadt.ukp.dkpro.argumentation.sequence.feature.meta;

import de.tudarmstadt.ukp.dkpro.tc.api.exception.TextClassificationException;
import de.tudarmstadt.ukp.dkpro.tc.api.features.ClassificationUnitFeatureExtractor;
import de.tudarmstadt.ukp.dkpro.tc.api.features.Feature;
import de.tudarmstadt.ukp.dkpro.tc.api.features.FeatureExtractorResource_ImplBase;
import de.tudarmstadt.ukp.dkpro.tc.api.type.TextClassificationUnit;
import org.apache.commons.codec.binary.Base64;
import org.apache.uima.jcas.JCas;

import java.io.*;
import java.util.Collections;
import java.util.List;

/**
 * @author Ivan Habernal
 */
public abstract class AbstractSequenceMetaDataFeatureGenerator extends FeatureExtractorResource_ImplBase
        implements ClassificationUnitFeatureExtractor {
    protected abstract List<String> extractSequence(JCas jCas, TextClassificationUnit classificationUnit);

    protected abstract String getMetaDataFeatureName();

    @Override
    public List<Feature> extract(JCas jCas, TextClassificationUnit classificationUnit)
            throws TextClassificationException {
        List<String> tags = extractSequence(jCas, classificationUnit);

        // encode tags to BASE64
        String value = encodeToString(tags);
        Feature feature = new Feature(getMetaDataFeatureName(), value);

        return Collections.singletonList(feature);
    }

    public static String encodeToString(Object object) throws TextClassificationException {
        try {
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteArrayOutputStream);

            objectOutputStream.writeObject(object);
            byteArrayOutputStream.close();

            return Base64.encodeBase64String(byteArrayOutputStream.toByteArray());
        } catch (IOException e) {
            throw new TextClassificationException(e);
        }
    }

    @SuppressWarnings("unchecked")
    public static List<String> decodeFromString(String featureValue) throws TextClassificationException {
        if (featureValue == null || featureValue.isEmpty()) {
            throw new TextClassificationException(
                    "MetaData feature value is empty. Maybe " + "you forgot to add the feature generator "
                            + AbstractSequenceMetaDataFeatureGenerator.class.getName());
        }

        try {
            byte[] bytes = Base64.decodeBase64(featureValue);
            ObjectInputStream objectInputStream = new ObjectInputStream(new ByteArrayInputStream(bytes));

            return (List<String>) objectInputStream.readObject();

        } catch (IOException | ClassNotFoundException e) {
            throw new TextClassificationException(e);
        }
    }
}