pl.edu.icm.cermine.tools.classification.general.FeatureVectorScalerImpl.java Source code

Java tutorial

Introduction

Here is the source code for pl.edu.icm.cermine.tools.classification.general.FeatureVectorScalerImpl.java

Source

/**
 * This file is part of CERMINE project.
 * Copyright (c) 2011-2013 ICM-UW
 *
 * CERMINE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CERMINE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with CERMINE. If not, see <http://www.gnu.org/licenses/>.
 */

package pl.edu.icm.cermine.tools.classification.general;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;
import java.util.StringTokenizer;
import org.apache.commons.io.IOUtils;
import pl.edu.icm.cermine.tools.classification.features.FeatureVector;

public class FeatureVectorScalerImpl implements FeatureVectorScaler {
    protected FeatureLimits[] limits;
    protected double scaledLowerBound;
    protected double scaledUpperBound;
    protected ScalingStrategy strategy;

    public FeatureVectorScalerImpl(int size, double lowerBound, double upperBound) {
        this.scaledLowerBound = lowerBound;
        this.scaledUpperBound = upperBound;
        limits = new FeatureLimits[size];
        //set default limits to: max = -inf, min = +inf
        for (int idx = 0; idx < size; ++idx) {
            limits[idx] = new FeatureLimits(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY);
        }
        strategy = new LinearScaling();
    }

    public void setStrategy(ScalingStrategy strategy) {
        this.strategy = strategy;
    }

    @Override
    public FeatureVector scaleFeatureVector(FeatureVector fv) {
        for (FeatureLimits l : limits) {
            assert l.getMin() != Double.POSITIVE_INFINITY && l.getMax() != Double.NEGATIVE_INFINITY;
        }
        return strategy.scaleFeatureVector(scaledLowerBound, scaledUpperBound, limits, fv);
    }

    public void setFeatureLimits(List<FeatureLimits> featureLimits) {
        this.limits = featureLimits.toArray(new FeatureLimits[featureLimits.size()]);
    }

    @Override
    public <A extends Enum<A>> void calculateFeatureLimits(List<TrainingSample<A>> trainingElements) {
        for (TrainingSample<A> trainingElem : trainingElements) {
            FeatureVector fv = trainingElem.getFeatureVector();
            List<String> names = fv.getFeatureNames();

            int featureIdx = 0;
            for (String name : names) {
                double val = fv.getValue(name);
                if (val > limits[featureIdx].max) {
                    limits[featureIdx].setMax(val);
                }
                if (val < limits[featureIdx].min) {
                    limits[featureIdx].setMin(val);
                }
                ++featureIdx;
            }
        }
        for (FeatureLimits limit : limits) {
            if (Double.isInfinite(limit.getMin()) || Double.isInfinite(limit.getMax())) {
                throw new RuntimeException("Feature limit is not calculated properly!");
            }
        }
    }

    public FeatureLimits[] getLimits() {
        return limits;
    }

    @Override
    public void saveRangeFile(String path) throws IOException {
        BufferedWriter fp_save = null;
        try {
            Formatter formatter = new Formatter(new StringBuilder());
            fp_save = new BufferedWriter(new FileWriter(path));

            double lower = 0.0;
            double upper = 1.0;

            formatter.format("x\n");
            formatter.format("%.16g %.16g\n", lower, upper);
            for (int i = 0; i < limits.length; ++i) {
                formatter.format("%d %.16g %.16g\n", i, limits[i].getMin(), limits[i].getMax());
            }

            fp_save.write(formatter.toString());
        } finally {
            if (fp_save != null) {
                fp_save.close();
            }
        }
    }

    public static FeatureVectorScalerImpl fromRangeReader(BufferedReader rangeFile) throws IOException {
        try {
            double feature_min, feature_max;
            if (rangeFile.read() == 'x') {
                rangeFile.readLine(); // pass the '\n' after 'x'
                String line = rangeFile.readLine();
                if (line == null) {
                    line = "";
                }
                StringTokenizer st = new StringTokenizer(line);
                double scaledLowerBound = Double.parseDouble(st.nextToken());
                double scaledUpperBound = Double.parseDouble(st.nextToken());
                if (scaledLowerBound != 0 || scaledUpperBound != 1) {
                    throw new RuntimeException("Feature lower bound and upper bound must"
                            + "be set in range file to resepctively 0 and 1");
                }
                String restore_line = null;
                List<FeatureLimits> limits = new ArrayList<FeatureLimits>();
                while ((restore_line = rangeFile.readLine()) != null) {
                    StringTokenizer st2 = new StringTokenizer(restore_line);
                    st2.nextToken(); //discard feature index
                    feature_min = Double.parseDouble(st2.nextToken());
                    feature_max = Double.parseDouble(st2.nextToken());
                    FeatureLimits newLimit = new FeatureLimits(feature_min, feature_max);
                    limits.add(newLimit);
                }

                FeatureVectorScalerImpl scaler = new FeatureVectorScalerImpl(limits.size(), scaledLowerBound,
                        scaledUpperBound);
                scaler.setStrategy(new LinearScaling());
                scaler.setFeatureLimits(limits);

                return scaler;
            } else {
                throw new RuntimeException("y scaling not supported");
            }
        } finally {
            IOUtils.closeQuietly(rangeFile);
        }
    }
}