ml.shifu.core.di.builtin.binning.MunroPatBinning.java Source code

Java tutorial

Introduction

Here is the source code for ml.shifu.core.di.builtin.binning.MunroPatBinning.java

Source

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ml.shifu.core.di.builtin.binning;

import org.apache.commons.lang3.StringUtils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class MunroPatBinning extends AbstractBinning<Double> {

    private MunroPatEstimator<Double> estimator = null;

    public MunroPatBinning(int binningNum, List<String> missingValList) {
        super(binningNum, missingValList);
        estimator = new MunroPatEstimator<Double>(binningNum);
    }

    public MunroPatBinning(int binningNum) {
        this(binningNum, null);
    }

    @Override
    public void addData(String val) {
        String fval = StringUtils.trimToEmpty(val);
        try {
            Double dval = Double.parseDouble(fval);
            estimator.add(dval);
        } catch (NumberFormatException e) {
            super.incInvalidValCnt();
        }
    }

    /**
     * set min/max, merge same bins
     *
     * @param bins
     * @return
     */
    private List<Double> binMerge(List<Double> bins) {

        List<Double> newBins = new ArrayList<Double>();
        if (bins.size() == 0) {
            bins.add(Double.NaN);
            return bins;
        }

        Double cur = bins.get(0);
        newBins.add(cur);

        int i = 1;
        while (i < bins.size()) {
            if (Math.abs(cur - bins.get(i)) > 1e-10) {
                newBins.add(bins.get(i));
            }
            cur = bins.get(i);
            i++;
        }

        if (newBins.size() == 1) {
            // special case since there is only 1 candidate in the bins
            double val = newBins.get(0);
            newBins = Arrays.asList(Double.NEGATIVE_INFINITY, val);
        } else if (newBins.size() == 2) {
            newBins.set(0, Double.NEGATIVE_INFINITY);
        } else {
            newBins.set(0, Double.NEGATIVE_INFINITY);
            // remove the max, and became open interval
            newBins.remove(newBins.size() - 1);
        }
        return newBins;
    }

    @Override
    public List<Double> getBinLeftBoundaries() {
        return binMerge(estimator.getQuantiles());
    }

    public List<Double> getUnMergedDataBin() {
        return estimator.getQuantiles();
    }

}