Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.core.di.builtin.binning; import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; /** * AbstractBinning class * */ public abstract class AbstractBinning<T> { /** * Special characters for object serialization */ public static final char FIELD_SEPARATOR = '\u0001'; public static final char SETLIST_SEPARATOR = '\u0002'; public static final char PAIR_SEPARATOR = '\u0003'; /** * Missing data count && invalid data count */ protected int missingValCnt = 0; protected int invalidValCnt = 0; /** * Expected missing value set. The default missing value set only contain empty string "" */ protected Set<String> missingValSet; /** * The expect bin number */ protected int expectedBinningNum; /** * Empty constructor : it is just for bin merging bin */ protected AbstractBinning() { } /** * Constructor with expected bin number * * @param binningNum */ public AbstractBinning(int binningNum) { this(binningNum, null); } /** * Constructor with expected bin number and expected missing values * * @param binningNum * @param missingValList */ public AbstractBinning(int binningNum, List<String> missingValList) { this.expectedBinningNum = binningNum; this.missingValSet = new HashSet<String>(); this.missingValSet.add(""); if (missingValList != null && !missingValList.isEmpty()) { for (String missingVal : missingValList) { missingValSet.add(StringUtils.trimToEmpty(missingVal)); } } } /** * Get value missing count * * @return */ public int getMissingValCnt() { return missingValCnt; } /** * Get invalid value count * * @return */ public int getInvalidValCnt() { return invalidValCnt; } /** * Add data into bin generator * * @param val */ public abstract void addData(String val); /** * Generate the bin boundary or bin category * * @return */ public abstract List<T> getBinLeftBoundaries(); /** * Check some value is missing value or not * * @param val * @return */ protected boolean isMissingVal(String val) { return missingValSet.contains(val); } /** * Increase the missing value count */ protected void incMissingValCnt() { missingValCnt++; } /** * Increase the invalid value count */ protected void incInvalidValCnt() { invalidValCnt++; } /** * Merge another binning info to this. Currently for the expected bin number, the max value will be used. * * @param another */ public void mergeBin(AbstractBinning<?> another) { this.expectedBinningNum = Math.max(this.expectedBinningNum, another.expectedBinningNum); this.missingValCnt += another.missingValCnt; this.invalidValCnt += another.invalidValCnt; if (missingValSet == null) { missingValSet = new HashSet<String>(); missingValSet.add(""); } missingValSet.addAll(another.missingValSet); } /** * convert @AbstractBinning to String * * @return */ protected void stringToObj(String objValStr) { String[] objStrArr = objValStr.split(Character.toString(FIELD_SEPARATOR), -1); if (objStrArr.length < 4) { throw new IllegalArgumentException("The size of argument is incorrect"); } missingValCnt = Integer.parseInt(StringUtils.trim(objStrArr[0])); invalidValCnt = Integer.parseInt(StringUtils.trim(objStrArr[1])); expectedBinningNum = Integer.parseInt(StringUtils.trim(objStrArr[2])); if (missingValSet == null) { missingValSet = new HashSet<String>(); } else { missingValSet.clear(); } String[] elements = objStrArr[3].split(Character.toString(SETLIST_SEPARATOR), -1); for (String element : elements) { missingValSet.add(element); } } /** * convert @AbstractBinning to String * * @return */ public String objToString() { List<String> strList = new ArrayList<String>(); strList.add(Integer.toString(missingValCnt)); strList.add(Integer.toString(invalidValCnt)); strList.add(Integer.toString(expectedBinningNum)); String missingValStr = StringUtils.join(missingValSet, SETLIST_SEPARATOR); strList.add(missingValStr); return StringUtils.join(strList, FIELD_SEPARATOR); } /** * Construct Binning class object from String * @param modelConfig - the @ModelConfig to use * @param columnConfig - the @ColumnConfig to create bin * @param objValStr - the string present of object * @return the Binning object for the ColumnConfig */ /* public static AbstractBinning<?> constructBinningFromStr(ModelConfig modelConfig, ColumnConfig columnConfig, String objValStr) { AbstractBinning<?> binning; if ( columnConfig.isCategorical() ) { binning = new CategoricalBinning(); } else { if ( modelConfig.getBinningMethod().equals(BinningMethod.EqualInterval) ) { binning = new EqualIntervalBinning(); } else { binning = new EqualPopulationBinning(); } } binning.stringToObj(objValStr); return binning; }*/ }