edu.cuny.qc.speech.AuToBI.featureextractor.XValSpectrumPADFeatureExtractor.java Source code

Java tutorial

Introduction

Here is the source code for edu.cuny.qc.speech.AuToBI.featureextractor.XValSpectrumPADFeatureExtractor.java

Source

/*  XValSpectrumPADFeatureExtractor.java
    
Copyright 2009-2014 Andrew Rosenberg
    
  This file is part of the AuToBI prosodic analysis package.
    
  AuToBI is free software: you can redistribute it and/or modify
  it under the terms of the Apache License (see boilerplate below)
    
 ***********************************************************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You should have received a copy of the Apache 2.0 License along with AuToBI.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 ***********************************************************************************************************************
 */
package edu.cuny.qc.speech.AuToBI.featureextractor;

import edu.cuny.qc.speech.AuToBI.classifier.AuToBIClassifier;
import edu.cuny.qc.speech.AuToBI.classifier.WekaClassifier;
import edu.cuny.qc.speech.AuToBI.core.*;
import edu.cuny.qc.speech.AuToBI.util.PartitionUtils;
import weka.classifiers.trees.J48;

import java.util.List;
import java.util.ArrayList;

/**
 * The XValSpectrumPADFeatureExtractor is used to generate cross vaidated predictions of spectral pitch accent detection
 * (PAD) hypotheses.  These xval hypotheses are used in training correction classfiers.
 * <p/>
 * This has been deprecated because it does not conform to the AuToBI v1.4 feature naming conventions
 */
@SuppressWarnings("unchecked")
@Deprecated
public class XValSpectrumPADFeatureExtractor extends FeatureExtractor {
    public static final String moniker = "barkPred,barkPredConf,barkPredConfAcc";

    private final String ACCENTED_VALUE = "ACCENTED"; // the accented value
    private final String FOLD_ASSIGNMENT_FEATURE = "FOLD_ASSIGNMENT";
    // an feature to store fold assignment information on
    private int low; // the bottom of the frequency region
    private int high; // the top of the frequency region
    private int num_folds; // the number of folds used in the hypothesis generation.
    private FeatureSet fs; // a description of the featureset used in the prediction

    /**
     * Constructs a new XValSpectrumFeatureExtractor for a specific spectral region.
     *
     * @param low       the bottom of the spectral region.
     * @param high      the top of the spectral region.
     * @param num_folds the number of folds to be used in the generation
     * @param fs        a feature set used in the xval experiments
     */
    public XValSpectrumPADFeatureExtractor(int low, int high, int num_folds, FeatureSet fs) {
        this.low = low;
        this.high = high;
        this.num_folds = num_folds;
        this.fs = fs;

        extracted_features.add("nominal_bark_" + low + "_" + high + "__prediction");
        extracted_features.add("bark_" + low + "_" + high + "__prediction_confidence");
        extracted_features.add("bark_" + low + "_" + high + "__prediction_confidence_accented");

        required_features.addAll(fs.getRequiredFeatures());
        required_features.add(fs.getClassAttribute());
    }

    /**
     * Generates cross validated pitch accent detection hypotheses using energy information drawn from the assigned
     * spectral region.
     *
     * @param regions The regions to extract features from.
     * @throws FeatureExtractorException if something goes wrong.
     */
    public void extractFeatures(List regions) throws FeatureExtractorException {
        // Construct a feature set.
        FeatureSet feature_set = fs.newInstance();

        // Extract spectrum features.
        feature_set.setDataPoints((List<Word>) regions);
        try {
            PartitionUtils.assignStratifiedFoldNum((List<Word>) regions, FOLD_ASSIGNMENT_FEATURE, num_folds,
                    feature_set.getClassAttribute());
        } catch (AuToBIException e) {
            throw new FeatureExtractorException(e.getMessage());
        }

        // Train n-fold cross validated prediction features.
        for (int fold_num = 0; fold_num < num_folds; ++fold_num) {
            AuToBIClassifier classifier = new WekaClassifier(new J48());

            FeatureSet training_fs = fs.newInstance();

            List<Word> training_regions = new ArrayList<Word>();
            List<Word> testing_regions = new ArrayList<Word>();
            try {
                PartitionUtils.splitData((List<Word>) regions, training_regions, testing_regions, fold_num,
                        FOLD_ASSIGNMENT_FEATURE);
            } catch (AuToBIException e) {
                // This should never happen.  If there is a problem with splitData it would have thrown
                // an exception during the fold assignment method assignStratifiedFoldNum
                throw new FeatureExtractorException(e.getMessage());
            }
            training_fs.setDataPoints(training_regions);
            training_fs.constructFeatures();
            try {
                classifier.train(training_fs);
            } catch (Exception e) {
                throw new FeatureExtractorException(e.getMessage());
            }

            for (Word w : testing_regions) {
                try {
                    Distribution result = classifier.distributionForInstance(w);

                    w.setAttribute("nominal_bark_" + low + "_" + high + "__prediction",
                            result.getKeyWithMaximumValue());
                    w.setAttribute("bark_" + low + "_" + high + "__prediction_confidence",
                            result.get(result.getKeyWithMaximumValue()));
                    w.setAttribute("bark_" + low + "_" + high + "__prediction_confidence_accented",
                            result.get(ACCENTED_VALUE));
                } catch (Exception e) {
                    throw new FeatureExtractorException(e.getMessage());
                }
            }
        }
    }
}