ml.shifu.shifu.fs.PathFinder.java Source code

Java tutorial

Introduction

Here is the source code for ml.shifu.shifu.fs.PathFinder.java

Source

/*
 * Copyright [2012-2014] PayPal Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ml.shifu.shifu.fs;

import ml.shifu.shifu.container.obj.EvalConfig;
import ml.shifu.shifu.container.obj.ModelConfig;
import ml.shifu.shifu.container.obj.RawSourceData.SourceType;
import ml.shifu.shifu.util.Constants;
import ml.shifu.shifu.util.Environment;
import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.pig.impl.util.JarManager;

import java.io.File;
import java.util.Map;

/**
 * {@link PathFinder} is used to obtain all files which can be used in our framework. Some are used for training,
 * evaling, performance ...
 *
 * <p>
 * {@link #modelConfig} should be passed as parameter in constructor
 */
public class PathFinder {

    public static final String FEATURE_IMPORTANCE_FILE = "all.fi";
    private static final String CORRELATION_CSV = "correlation.csv";
    private static final String REASON_CODE_PATH = "common/ReasonCodeMapV3.json";
    private static final String SHIFU_JAR_PATH = "lib/*.jar";

    /**
     * {@link PathFinder#modelConfig} is used to get global setting for model config path.
     */
    private ModelConfig modelConfig;

    /**
     * If not specified SHIFU_HOME env, some key configurations like pig path or lib path can be configured here
     */
    private Map<String, Object> otherConfigs;

    /**
     * Constructor with valid parameter modelConfig
     *
     * @param modelConfig
     *            the model config
     * @throws IllegalArgumentException
     *             if {@link #modelConfig} is null.
     */
    public PathFinder(ModelConfig modelConfig) {
        if (modelConfig == null) {
            throw new IllegalArgumentException("modelConfig should not be null.");
        }
        this.modelConfig = modelConfig;
    }

    /**
     * Constructor with valid parameter modelConfig
     *
     * @param modelConfig
     *            - modelConfig to find
     * @param otherConfigs
     *            other configuration parameters
     * @throws IllegalArgumentException
     *             if {@link #modelConfig} is null.
     */
    public PathFinder(ModelConfig modelConfig, Map<String, Object> otherConfigs) {
        this(modelConfig);
        this.otherConfigs = otherConfigs;
    }

    /**
     * Get absolute path with SHIFU_HOME env.
     * - if the path is already absolute path, just return
     * - or assume it is relative path to SHIFU_HOME
     *
     * @param path
     *            the given path
     * @return absolute path the absolute path
     */
    public String getScriptPath(String path) {
        String shifuHome = Environment.getProperty(Environment.SHIFU_HOME);
        if (shifuHome == null || shifuHome.length() == 0) {
            // return relative path which is in shifu-*.jar
            return path;
        } else {
            String pathStr = (new Path(path)).isAbsolute() ? path
                    : new Path(Environment.getProperty(Environment.SHIFU_HOME), path).toString();
            File file = new File(pathStr);
            if (file.exists()) {
                return pathStr;
            } else {
                // return arguly path which is in shifu-*.jar
                return path;
            }
        }
    }

    public String getAbsolutePath(String path) {
        String shifuHome = Environment.getProperty(Environment.SHIFU_HOME);
        if (shifuHome == null || shifuHome.length() == 0) {
            // return absolute path which is in shifu-*.jar
            return path;
        } else {
            return (new Path(path)).isAbsolute() ? path
                    : new Path(Environment.getProperty(Environment.SHIFU_HOME), path).toString();
        }
    }

    /**
     * Get project jar file path name.
     * - Since the Jar Path is only used in pig code compiling, just return local path
     *
     * @return path of SHIFU dependent jars
     */
    public String getJarPath() {
        String shifuHome = Environment.getProperty(Environment.SHIFU_HOME);
        if (shifuHome == null || shifuHome.length() == 0) {
            return new Path(JarManager.findContainingJar(PathFinder.class)).toString();
        } else {
            // very ugly to check if it is outside jar
            try {
                Class.forName("ml.shifu.dtrain.DTrainRequestProcessor");
                return new Path(JarManager.findContainingJar(PathFinder.class)).toString();
            } catch (ClassNotFoundException e) {
                return getAbsolutePath(SHIFU_JAR_PATH);
            }
        }
    }

    /**
     * Get reason code path name.
     * - Reason code file may be used in MapReduce job, so it should have hdfs path
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of reason code file
     */
    public String getReasonCodeMapPath(SourceType sourceType) {
        switch (sourceType) {
        case LOCAL:
            return getAbsolutePath(REASON_CODE_PATH);
        case HDFS:
            return (new Path(getModelSetHdfsPath(), Constants.REASON_CODE_MAP_JSON)).toString();
        default:
            // Others, maybe be we will support S3 in future
            throw new NotImplementedException("Source type - " + sourceType.name() + " is not supported yet!");
        }
    }

    /**
     * Get the file path of ModelConfig
     *
     * @return path to ModelConfig
     */
    public String getModelConfigPath() {
        return getPathBySourceType(Constants.LOCAL_MODEL_CONFIG_JSON, SourceType.LOCAL);
    }

    /**
     * Get the file path of ModelConfig
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path to ModelConfig
     */
    public String getModelConfigPath(SourceType sourceType) {
        return getPathBySourceType(Constants.MODEL_CONFIG_JSON_FILE_NAME, sourceType);
    }

    /**
     * Get the file path of ColumnConfig
     *
     * @return path to ColumnConfig
     */
    public String getColumnConfigPath() {
        return getPathBySourceType(Constants.LOCAL_COLUMN_CONFIG_JSON, SourceType.LOCAL);
    }

    /**
     * Get the file path of ColumnConfig
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of ColumnConfig
     */
    public String getColumnConfigPath(SourceType sourceType) {
        return getPathBySourceType(Constants.COLUMN_CONFIG_JSON_FILE_NAME, sourceType);
    }

    public String getLocalColumnStatsPath() {

        return getPathBySourceType(
                Constants.COLUMN_META_FOLDER_NAME + File.separator + Constants.COLUMN_STATS_CSV_FILE_NAME,
                SourceType.LOCAL);
    }

    /**
     * Get pre-traing stats path.
     *
     * @return path of pre-training stats file
     */
    public String getPreTrainingStatsPath() {
        return getPreTrainingStatsPath(modelConfig.getDataSet().getSource());
    }

    /**
     * Get pre-traing stats path.
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of pre-training stats file
     */
    public String getPreTrainingStatsPath(SourceType sourceType) {
        String preTrainingStatsPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_PRE_TRAIN_STATS_PATH);

        if (StringUtils.isBlank(preTrainingStatsPath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.PRE_TRAINING_STATS), sourceType);
        } else {
            return new Path(preTrainingStatsPath).toString();
        }
    }

    public String getStatsSmallBins() {
        return getStatsSmallBins(modelConfig.getDataSet().getSource());
    }

    /**
     * Get stats small bins path
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of stats small-bin file
     */
    public String getStatsSmallBins(SourceType sourceType) {
        return getPathBySourceType(new Path(Constants.TMP, Constants.STATS_SMALL_BINS), sourceType);
    }

    /**
     * Get post train out put path
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of pre-training stats file
     */
    public String getPostTrainOutputPath(SourceType sourceType) {
        String postTrainOutput = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_PRE_TRAIN_STATS_PATH);

        if (StringUtils.isBlank(postTrainOutput)) {
            return getPathBySourceType(new Path(Constants.TMP, "posttrain-output"), sourceType);
        } else {
            return new Path(postTrainOutput).toString();
        }
    }

    /**
     * Get auto type distinct count file path.
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of auto type folder name
     */
    public String getAutoTypeFilePath(SourceType sourceType) {
        String preTrainingStatsPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_AUTO_TYPE_PATH);

        if (StringUtils.isBlank(preTrainingStatsPath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.AUTO_TYPE_PATH), sourceType);
        } else {
            return new Path(preTrainingStatsPath).toString();
        }
    }

    /**
     * Get correlation result file path.
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of auto type folder name
     */
    public String getCorrelationPath(SourceType sourceType) {
        String preTrainingStatsPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_CORRELATION_PATH);

        if (StringUtils.isBlank(preTrainingStatsPath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.CORRELATION_PATH), sourceType);
        } else {
            return new Path(preTrainingStatsPath).toString();
        }
    }

    /**
     * Get the path for select raw data
     *
     * @return path of selected raw data
     */
    public String getSelectedRawDataPath() {
        return getSelectedRawDataPath(modelConfig.getDataSet().getSource());
    }

    /**
     * Get the path for select raw data
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of selected raw data
     */
    public String getSelectedRawDataPath(SourceType sourceType) {
        String selectedRawDataPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_SELECTED_RAW_DATA_PATH);

        if (StringUtils.isBlank(selectedRawDataPath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.SELECTED_RAW_DATA), sourceType);
        } else {
            return new Path(selectedRawDataPath).toString();
        }
    }

    /**
     * Get the path of normalized data
     *
     * @return path of normalized data
     */
    public String getNormalizedDataPath() {
        return getNormalizedDataPath(modelConfig.getDataSet().getSource());
    }

    /**
     * Get the path of normalized cross validation data
     *
     * @return path of normalized cross validation data
     */
    public String getNormalizedValidationDataPath() {
        return getNormalizedValidationDataPath(modelConfig.getDataSet().getSource());
    }

    /**
     * Get the path of normalized data
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of normalized data
     */
    public String getNormalizedDataPath(SourceType sourceType) {
        String normalizedDataPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_NORMALIZED_DATA_PATH);

        if (StringUtils.isBlank(normalizedDataPath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.NORMALIZED_DATA), sourceType);
        } else {
            return new Path(normalizedDataPath).toString();
        }
    }

    public String getCleanedDataPath() {
        return getCleanedDataPath(modelConfig.getDataSet().getSource());
    }

    /**
     * Clean and filter raw data set for RF/GBT model inputs
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of normalized data
     */
    public String getCleanedDataPath(SourceType sourceType) {
        String normalizedDataPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_CLEANED_DATA_PATH);

        if (StringUtils.isBlank(normalizedDataPath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.CLEANED_DATA), sourceType);
        } else {
            return new Path(normalizedDataPath).toString();
        }
    }

    public String getCleanedValidationDataPath() {
        return getCleanedValidationDataPath(modelConfig.getDataSet().getSource());
    }

    /**
     * Clean and filter raw validation data set for RF/GBT model inputs
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of normalized data
     */
    public String getCleanedValidationDataPath(SourceType sourceType) {
        String normalizedDataPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_CLEANED_VALIDATION_DATA_PATH);

        if (StringUtils.isBlank(normalizedDataPath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.CLEANED_VALIDATION_DATA), sourceType);
        } else {
            return new Path(normalizedDataPath).toString();
        }
    }

    /**
     * Get the path of validation normalized data
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of normalized data
     */
    public String getNormalizedValidationDataPath(SourceType sourceType) {
        String normalizedDataPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_NORMALIZED_VALIDATION_DATA_PATH);

        if (StringUtils.isBlank(normalizedDataPath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.NORMALIZED_VALIDATION_DATA), sourceType);
        } else {
            return new Path(normalizedDataPath).toString();
        }
    }

    /**
     * Get the path of varselect MSE stats path.
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of var select MSE stats path
     */
    public String getVarSelectMSEOutputPath(SourceType sourceType) {
        String varSelectStatsPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_VARSLECT_STATS_PATH);

        if (StringUtils.isBlank(varSelectStatsPath)) {
            return getPathBySourceType(new Path(Constants.TMP, "varselectStats"), sourceType);
        } else {
            return new Path(varSelectStatsPath).toString();
        }
    }

    /**
     * Get the path of varselect MSE stats path.
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of var select MSE stats path
     */
    public String getUpdatedBinningInfoPath(SourceType sourceType) {
        String preTrainPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_PRE_TRAIN_STATS_PATH);

        if (StringUtils.isBlank(preTrainPath)) {
            return getPathBySourceType(new Path(Constants.TMP, "UpdatedBinningInfo"), sourceType);
        } else {
            return new Path(preTrainPath).toString();
        }
    }

    public String getPSIInfoPath() {
        return this.getPSIInfoPath(modelConfig.getDataSet().getSource());
    }

    public String getPSIInfoPath(SourceType sourceType) {
        String preTrainPath = getPreferPath(modelConfig.getTrain().getCustomPaths(), Constants.KEY_PRE_PSI_PATH);

        if (StringUtils.isBlank(preTrainPath)) {
            return getPathBySourceType(new Path(Constants.TMP, "PSIInfo"), sourceType);
        } else {
            return new Path(preTrainPath).toString();
        }

    }

    /**
     * Get the path of models
     *
     * @return path of models
     */
    public String getModelsPath() {
        return getModelsPath(modelConfig.getDataSet().getSource());
    }

    /**
     * Get the path of models
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of models
     */
    public String getModelsPath(SourceType sourceType) {
        return getPathBySourceType(new Path(Constants.MODELS), sourceType);
    }

    /**
     * Get the path of one bagging model
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of models
     */
    public String getBaggingModelPath(SourceType sourceType) {
        return getPathBySourceType(new Path("onebaggingmodel"), sourceType);
    }

    /**
     * Get the path ofnn binary models
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of models
     */
    public String getNNBinaryModelsPath(SourceType sourceType) {
        return getPathBySourceType(new Path("bmodels"), sourceType);
    }

    public String getValErrorPath(SourceType sourceType) {
        return getPathBySourceType(new Path(Constants.TMP, "valerr"), sourceType);
    }

    /**
     * Get the path of models
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of models
     */
    public String getTmpModelsPath(SourceType sourceType) {
        return getPathBySourceType(new Path(Constants.TMP, Constants.DEFAULT_MODELS_TMP_FOLDER), sourceType);
    }

    public String getVarSelsPath(SourceType sourceType) {

        return getPathBySourceType(new Path(Constants.VarSels), sourceType);
    }

    public String getModelVersion(SourceType sourceType) {
        switch (sourceType) {
        case LOCAL:
            return new Path(this.getModelSetLocalPath(), Constants.BACKUPNAME).toString();
        case HDFS:
            return new Path(this.getModelSetHdfsPath(), Constants.BACKUPNAME).toString();
        default:
            // Others, maybe be we will support S3 in future
            throw new NotImplementedException("Source type - " + sourceType.name() + " is not supported yet!");
        }
    }

    /**
     * Get the path of average score for each bin
     *
     * @return path of bin average score
     */
    public String getBinAvgScorePath() {
        return getBinAvgScorePath(modelConfig.getDataSet().getSource());
    }

    /**
     * Get the path of average score for each bin
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of bin average score
     */
    public String getBinAvgScorePath(SourceType sourceType) {
        String binAvgScorePath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_BIN_AVG_SCORE_PATH);

        if (StringUtils.isBlank(binAvgScorePath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.BIN_AVG_SCORE), sourceType);
        } else {
            return new Path(binAvgScorePath).toString();
        }
    }

    /**
     * Get the path of train score
     *
     * @return path of train score
     */
    public String getTrainScoresPath() {
        return getTrainScoresPath(modelConfig.getDataSet().getSource());
    }

    /**
     * Get the path of train score
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of train score
     */
    public String getTrainScoresPath(SourceType sourceType) {
        String trainScoresPath = getPreferPath(modelConfig.getTrain().getCustomPaths(),
                Constants.KEY_TRAIN_SCORES_PATH);

        if (StringUtils.isBlank(trainScoresPath)) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.TRAIN_SCORES), sourceType);
        } else {
            return new Path(trainScoresPath).toString();
        }
    }

    /**
     * Get the evaluation root path according the source type
     *
     * @param sourceType
     *            - Local/HDFS
     * @return path of evaluation root
     */
    public String getEvalsPath(SourceType sourceType) {
        Path path = new Path(Constants.EVAL_DIR);
        return this.getPathBySourceType(path.toString(), sourceType);
    }

    /**
     * Get evaluation set home directory
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @return path of evaluation set home directory
     */
    public String getEvalSetPath(EvalConfig evalConfig) {
        return getEvalSetPath(evalConfig, evalConfig.getDataSet().getSource());
    }

    /**
     * Get evaluation set home directory, something like Model/eval/evalName)
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @param sourceType
     *            - Local/HDFS
     * @return path of evaluation set home directory
     */
    public String getEvalSetPath(EvalConfig evalConfig, SourceType sourceType) {
        return getEvalSetPath(evalConfig.getName(), sourceType);
    }

    /**
     * Get evaluation set home directory, something like eval name
     *
     * @param evalName
     *            - evalset name to find
     * @param sourceType
     *            - Local/HDFS
     * @return path of evaluation set home directory
     */
    public String getEvalSetPath(String evalName, SourceType sourceType) {
        return new Path(this.getEvalsPath(sourceType), evalName).toString();
    }

    /**
     * Get the path of evaluation normalized data
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @return path of evaluation normalized data
     */
    public String getEvalNormalizedPath(EvalConfig evalConfig) {
        return getEvalNormalizedPath(evalConfig, evalConfig.getDataSet().getSource());
    }

    /**
     * Get the path of evaluation normalized data
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @param sourceType
     *            - Local/HDFS
     * @return path of evaluation normalized data
     */
    public String getEvalNormalizedPath(EvalConfig evalConfig, SourceType sourceType) {
        return getEvalFilePath(evalConfig.getName(), Constants.EVAL_NORMALIZED, sourceType);
    }

    /**
     * Get the path of evaluation score
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @return path of evaluation score
     */
    public String getEvalScorePath(EvalConfig evalConfig) {
        return getEvalScorePath(evalConfig, evalConfig.getDataSet().getSource());
    }

    /**
     * Get the path of evaluation score
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @param metaColumn
     *            - score column
     * @return path of evaluation score
     */
    public String getEvalMetaScorePath(EvalConfig evalConfig, String metaColumn) {
        SourceType sourceType = evalConfig.getDataSet().getSource();

        String scoreMetaPath = getPreferPath(evalConfig.getCustomPaths(), Constants.KEY_SCORE_PATH);
        if (StringUtils.isBlank(scoreMetaPath)) {
            scoreMetaPath = getEvalFilePath(evalConfig.getName(), Constants.EVAL_META_SCORE, sourceType);
        } else {
            scoreMetaPath = new Path(scoreMetaPath, Constants.EVAL_META_SCORE).toString();
        }

        return new Path(scoreMetaPath, metaColumn).toString();
    }

    /**
     * Get the path of evaluation score
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @param sourceType
     *            - Local/HDFS
     * @return path of evaluation score
     */
    public String getEvalScorePath(EvalConfig evalConfig, SourceType sourceType) {
        String scorePath = getPreferPath(evalConfig.getCustomPaths(), Constants.KEY_SCORE_PATH);
        if (StringUtils.isBlank(scorePath)) {
            return getEvalFilePath(evalConfig.getName(), Constants.EVAL_SCORE, sourceType);
        } else {
            return new Path(scorePath).toString();
        }
    }

    public String getEvalConfusionPath(EvalConfig evalConfig, SourceType sourceType) {
        String scorePath = getPreferPath(evalConfig.getCustomPaths(), Constants.KEY_SCORE_PATH);
        if (StringUtils.isBlank(scorePath)) {
            return getEvalFilePath(evalConfig.getName(), Constants.EVAL_SCORE, sourceType);
        } else {
            return new Path(scorePath).toString();
        }
    }

    /**
     * Get the header path of evaluation score
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @return path of evaluation score header
     */
    public String getEvalScoreHeaderPath(EvalConfig evalConfig) {
        return getEvalScoreHeaderPath(evalConfig, evalConfig.getDataSet().getSource());
    }

    /**
     * Get the header path of evaluation score
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @param sourceType
     *            - Local/HDFS
     * @return path of evaluation score header
     */
    public String getEvalScoreHeaderPath(EvalConfig evalConfig, SourceType sourceType) {
        String scorePath = getEvalScorePath(evalConfig, sourceType);
        return new Path(scorePath, Constants.PIG_HEADER).toString();
    }

    /**
     * Get the path of evaluation set performance for EvalMetaScore column
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @param metaColumn
     *            meta column
     * @return eval meta path
     */
    public String getEvalMetaPerformancePath(EvalConfig evalConfig, String metaColumn) {
        String evalPerformancePath = getPreferPath(evalConfig.getCustomPaths(), Constants.KEY_PERFORMANCE_PATH);

        if (StringUtils.isBlank(evalPerformancePath)) {
            String evalMetaPerfPath = getEvalFilePath(evalConfig.getName(), Constants.EVAL_META_SCORE,
                    evalConfig.getDataSet().getSource());
            return new Path(evalMetaPerfPath, metaColumn + Constants.EVAL_PERFORMANCE).toString();
        } else {
            return new Path(evalPerformancePath, metaColumn + Constants.EVAL_PERFORMANCE).toString();
        }
    }

    /**
     * Get the path of evaluation set performance
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @return path of evaluation set performance
     */
    public String getEvalPerformancePath(EvalConfig evalConfig) {
        return getEvalPerformancePath(evalConfig, evalConfig.getDataSet().getSource());
    }

    /**
     * Get the path of evaluation set performance
     *
     * @param evalConfig
     *            - EvalConfig to find
     * @param sourceType
     *            - Local/HDFS
     * @return - path of evaluation set performance
     */
    public String getEvalPerformancePath(EvalConfig evalConfig, SourceType sourceType) {
        String evalPerformancePath = getPreferPath(evalConfig.getCustomPaths(), Constants.KEY_PERFORMANCE_PATH);
        if (StringUtils.isBlank(evalPerformancePath)) {
            return getEvalFilePath(evalConfig.getName(), Constants.EVAL_PERFORMANCE, sourceType);
        } else {
            return new Path(evalPerformancePath, Constants.EVAL_PERFORMANCE).toString();
        }
    }

    public String getEvalMatrixPath(EvalConfig evalConfig, SourceType sourceType) {
        String evalMatrixPath = getPreferPath(evalConfig.getCustomPaths(), Constants.KEY_CONFUSION_MATRIX_PATH);
        if (StringUtils.isBlank(evalMatrixPath)) {
            return getEvalFilePath(evalConfig.getName(), Constants.EVAL_MATRIX, sourceType);
        } else {
            return new Path(evalMatrixPath, Constants.EVAL_MATRIX).toString();
        }
    }

    /**
     * Get the file path for specified name under evaluation set home directory
     *
     * @param evalName
     *            - evalset name to find
     * @param specifiedFileName
     *            - the specified file name
     * @param sourceType
     *            - Local/HDFS
     * @return path of specified file
     */
    public String getEvalFilePath(String evalName, String specifiedFileName, SourceType sourceType) {
        return (new Path(this.getEvalSetPath(evalName, sourceType), specifiedFileName)).toString();
    }

    public String getEvalLocalMultiMatrixFile(String evalName) {
        return (new Path(this.getEvalSetPath(evalName, SourceType.LOCAL), "multi-eval-confustion-matrix.csv"))
                .toString();
    }

    public String getModelSetPath(SourceType sourceType) {
        switch (sourceType) {
        case LOCAL:
            return this.getModelSetLocalPath().toString();
        case HDFS:
            return this.getModelSetHdfsPath().toString();
        default:
            // Others, maybe be we will support S3 in future
            throw new NotImplementedException("Source type - " + sourceType.name() + " is not supported yet!");
        }
    }

    /**
     * Get the local home directory for Model
     *
     * @return - the Path of local home directory
     */
    private Path getModelSetLocalPath() {
        return (otherConfigs != null && otherConfigs.get(Constants.SHIFU_CURRENT_WORKING_DIR) != null)
                ? new Path(otherConfigs.get(Constants.SHIFU_CURRENT_WORKING_DIR).toString())
                : new Path(".");
    }

    /**
     * Get the HDFS home directory for Model
     *
     * @return - the Path of HDFS home directory
     */
    private Path getModelSetHdfsPath() {
        String modelSetPath = this.getPreferPath(modelConfig.getBasic().getCustomPaths(),
                Constants.KEY_HDFS_MODEL_SET_PATH);
        return (StringUtils.isBlank(modelSetPath) ? new Path(Constants.MODEL_SETS, modelConfig.getBasic().getName())
                : new Path(modelSetPath, modelConfig.getBasic().getName()));
    }

    /**
     * Get the relative path to model home directory by source type
     *
     * @param path
     *            - the path to find
     * @param sourceType
     *            - Local/HDFS
     * @return the relative path to the model home directory
     */
    public String getPathBySourceType(Path path, SourceType sourceType) {
        switch (sourceType) {
        case LOCAL:
            return new Path(getModelSetLocalPath(), path).toString();
        case HDFS:
            return ShifuFileUtils.getFileSystemBySourceType(sourceType)
                    .makeQualified(new Path(getModelSetHdfsPath(), path)).toString();
        default:
            // Others, maybe be we will support S3 in future
            throw new NotImplementedException("Source type - " + sourceType.name() + " is not supported yet!");
        }
    }

    /**
     * Get the relative path to model home directory by source type
     *
     * @param path
     *            - the path to find
     * @param sourceType
     *            - Local/HDFS
     * @return the relative path to the model home directory
     */
    public String getPathBySourceType(String path, SourceType sourceType) {
        return getPathBySourceType(new Path(path), sourceType);
    }

    /**
     * Return local correlation csv path
     *
     * @return the local correlation csv path
     */
    public String getLocalCorrelationCsvPath() {
        return getPathBySourceType(CORRELATION_CSV, SourceType.LOCAL);
    }

    /**
     * Get the prefer path for files. Prefer path means:
     * - if the user set the path in customPaths, try to use it
     * - or return null
     *
     * @param customPaths
     *            - map of customer setting path
     * @param key
     *            - path key in ModelConfig
     * @return - path for files
     */
    private String getPreferPath(Map<String, String> customPaths, String key) {
        if (customPaths == null || customPaths.size() == 0) {
            return null;
        }

        return customPaths.get(key);
    }

    public String getLocalFeatureImportancePath() {
        return new Path(getLocalFeatureImportanceFolder(), FEATURE_IMPORTANCE_FILE).toString();
    }

    public String getLocalFeatureImportanceFolder() {
        return getPathBySourceType(new Path("featureImportance"), SourceType.LOCAL);
    }

    /**
     * @return the otherConfigs
     */
    public Map<String, Object> getOtherConfigs() {
        return otherConfigs;
    }

    /**
     * Get the train data path for assemble model
     *
     * @return - train data path for assemble model
     */
    public String getSubModelsAssembleTrainData() {
        return getPathBySourceType(new Path(Constants.TMP, "AssembleTrainData"),
                this.modelConfig.getDataSet().getSource());
    }

    /**
     * Get the eval data path for assemble model
     *
     * @param evalName
     *            - evalset name
     * @param sourceType
     *            - Local/HDFS
     * @return - eval data path for assemble model
     */
    public String getSubModelsAssembleEvalData(String evalName, SourceType sourceType) {
        return getPathBySourceType(new Path(Constants.TMP, evalName + "AssembleEvalData"), sourceType);
    }

    /**
     * Get the shuffle data path
     *
     * @return - the shuffle data path
     */
    public String getShuffleDataPath() {
        return getShuffleDataPath(modelConfig.getDataSet().getSource());
    }

    /**
     * Get the shuffle data path according SourceType
     *
     * @param sourceType
     *            - Local/HDFS
     * @return - the shuffle data path
     */
    private String getShuffleDataPath(SourceType sourceType) {
        return getPathBySourceType(new Path(Constants.TMP, Constants.SHUFFLED_DATA_PATH), sourceType);
    }

    /**
     * Get the backup ColumnConfig
     *
     * @return - the ColumnConfig.json path for backup
     */
    public String getBackupColumnConfig() {
        return getPathBySourceType(new Path(Constants.TMP, Constants.COLUMN_CONFIG_JSON_FILE_NAME),
                SourceType.LOCAL);
    }

    /**
     * Get the backup ColumnConfig
     * @param postTimeStamp - timestamp to back ColumnConfig file name
     * @return - the ColumnConfig.json path for backup
     */
    public String getBackupColumnConfig(String postTimeStamp) {
        return getPathBySourceType(
                new Path(Constants.TMP, Constants.COLUMN_CONFIG_JSON_FILE_NAME + "." + postTimeStamp),
                SourceType.LOCAL);
    }

    /**
     * Get the varsel auto filter history to let user have the opportunity to change
     *
     * @return - the varsel.history path for variable auto filter
     */
    public String getVarSelHistory() {
        return getPathBySourceType(new Path(Constants.VAR_SELECT, Constants.VAR_SEL_HISTORY), SourceType.LOCAL);
    }

    /**
     * Get the correlation export path
     *
     * @return - the correlation path for export
     */
    public String getCorrExportPath() {
        return getPathBySourceType(new Path(Constants.TMP, Constants.CORR_EXPORT_PATH), SourceType.LOCAL);
    }

    /**
     * Get the ColumnConfig.json history by index
     *
     * @return - ColumnConfig.json path for variable selection history
     */
    public String getVarSelDir() {
        return getPathBySourceType(new Path(Constants.VAR_SELECT), SourceType.LOCAL);
    }

    /**
     * Get the ColumnConfig.json history by index
     *
     * @param index
     *            - the iteration of variable selection
     * @return - ColumnConfig.json path for variable selection history
     */
    public String getVarSelColumnConfig(int index) {
        return getPathBySourceType(
                new Path(Constants.VAR_SELECT, Constants.COLUMN_CONFIG_JSON_FILE_NAME + "." + index),
                SourceType.LOCAL);
    }

    /**
     * Get the SE history by index
     *
     * @param index
     *            - the iteration of variable selection
     * @return - se path for variable selection history
     */
    public String getVarSelMSEHistPath(int index) {
        return getPathBySourceType(
                new Path(Constants.VAR_SELECT, Constants.SHIFU_VARSELECT_SE_OUTPUT_NAME + "." + index),
                SourceType.LOCAL);
    }

    /**
     * Get the columnconfig.psi path
     * @return - columnconfig.psi path for storing unit stats
     */
    public String getColumnConfigUnitStatsPath() {
        return getPathBySourceType(new Path(Constants.TMP, Constants.CC_UNIT_STATS_PATH), SourceType.LOCAL);
    }

    public String getEncodeDataPath(EvalConfig evalConfig) {
        if (evalConfig == null) {
            return getPathBySourceType(new Path(Constants.TMP, Constants.TRAIN_DATA_ENCODE_PATH), SourceType.HDFS);
        } else {
            return getPathBySourceType(
                    new Path(Constants.TMP,
                            Constants.EVAL_DATA_ENCODE_PREFIX + StringUtils.capitalize(evalConfig.getName())),
                    SourceType.HDFS);
        }
    }
}