com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteParForUtils.java Source code

Introduction

Here is the source code for com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteParForUtils.java
Source

/**
 * (C) Copyright IBM Corp. 2010, 2015
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
*/

package com.ibm.bi.dml.runtime.controlprogram.parfor;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

import scala.Tuple2;

import com.ibm.bi.dml.api.DMLScript;
import com.ibm.bi.dml.conf.ConfigurationManager;
import com.ibm.bi.dml.parser.Expression.DataType;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.controlprogram.LocalVariableMap;
import com.ibm.bi.dml.runtime.controlprogram.ParForProgramBlock;
import com.ibm.bi.dml.runtime.controlprogram.caching.CacheStatistics;
import com.ibm.bi.dml.runtime.controlprogram.caching.CacheableData;
import com.ibm.bi.dml.runtime.controlprogram.caching.MatrixObject;
import com.ibm.bi.dml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import com.ibm.bi.dml.runtime.controlprogram.parfor.stat.Stat;
import com.ibm.bi.dml.runtime.instructions.cp.Data;
import com.ibm.bi.dml.runtime.util.LocalFileUtils;
import com.ibm.bi.dml.utils.Statistics;

/**
 * Common functionalities for parfor workers in MR jobs. Used by worker wrappers in
 * mappers (base RemoteParFor) and reducers (fused data partitioning and parfor)
 * 
 */
public class RemoteParForUtils {

    /**
     * 
     * @param reporter
     * @param deltaTasks
     * @param deltaIterations
     */
    public static void incrementParForMRCounters(Reporter reporter, long deltaTasks, long deltaIterations) {
        //report parfor counters
        if (deltaTasks > 0)
            reporter.incrCounter(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_NUMTASKS.toString(),
                    deltaTasks);
        if (deltaIterations > 0)
            reporter.incrCounter(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_NUMITERS.toString(),
                    deltaIterations);

        JobConf job = ConfigurationManager.getCachedJobConf();
        if (DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode(job)) {
            //report cache statistics
            reporter.incrCounter(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_JITCOMPILE.toString(),
                    Statistics.getJITCompileTime());
            reporter.incrCounter(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_JVMGC_COUNT.toString(),
                    Statistics.getJVMgcCount());
            reporter.incrCounter(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME, Stat.PARFOR_JVMGC_TIME.toString(),
                    Statistics.getJVMgcTime());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_HITS_MEM.toString(), CacheStatistics.getMemHits());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString(), CacheStatistics.getFSBuffHits());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_HITS_FS.toString(), CacheStatistics.getFSHits());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_HITS_HDFS.toString(), CacheStatistics.getHDFSHits());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString(), CacheStatistics.getFSBuffWrites());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_WRITES_FS.toString(), CacheStatistics.getFSWrites());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_WRITES_HDFS.toString(), CacheStatistics.getHDFSWrites());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_TIME_ACQR.toString(), CacheStatistics.getAcquireRTime());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_TIME_ACQM.toString(), CacheStatistics.getAcquireMTime());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_TIME_RLS.toString(), CacheStatistics.getReleaseTime());
            reporter.incrCounter(CacheableData.CACHING_COUNTER_GROUP_NAME,
                    CacheStatistics.Stat.CACHE_TIME_EXP.toString(), CacheStatistics.getExportTime());

            //reset cache statistics to prevent overlapping reporting
            CacheStatistics.reset();
        }
    }

    /**
     * 
     * @param workerID
     * @param vars
     * @param resultVars
     * @param out
     * @throws DMLRuntimeException
     * @throws IOException
     */
    public static void exportResultVariables(long workerID, LocalVariableMap vars, ArrayList<String> resultVars,
            OutputCollector<Writable, Writable> out) throws DMLRuntimeException, IOException {
        exportResultVariables(workerID, vars, resultVars, null, out);
    }

    /**
     * For remote MR parfor workers.
     * 
     * @param workerID
     * @param vars
     * @param resultVars
     * @param rvarFnames
     * @param out
     * @throws DMLRuntimeException
     * @throws IOException
     */
    public static void exportResultVariables(long workerID, LocalVariableMap vars, ArrayList<String> resultVars,
            HashMap<String, String> rvarFnames, OutputCollector<Writable, Writable> out)
            throws DMLRuntimeException, IOException {
        //create key and value for reuse
        LongWritable okey = new LongWritable(workerID);
        Text ovalue = new Text();

        //foreach result variables probe if export necessary
        for (String rvar : resultVars) {
            Data dat = vars.get(rvar);

            //export output variable to HDFS (see RunMRJobs)
            if (dat != null && dat.getDataType() == DataType.MATRIX) {
                MatrixObject mo = (MatrixObject) dat;
                if (mo.isDirty()) {
                    if (ParForProgramBlock.ALLOW_REUSE_MR_PAR_WORKER && rvarFnames != null) {
                        String fname = rvarFnames.get(rvar);
                        if (fname != null)
                            mo.setFileName(fname);

                        //export result var (iff actually modified in parfor)
                        mo.exportData(); //note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)      
                        rvarFnames.put(rvar, mo.getFileName());
                    } else {
                        //export result var (iff actually modified in parfor)
                        mo.exportData(); //note: this is equivalent to doing it in close (currently not required because 1 Task=1Map tasks, hence only one map invocation)
                    }

                    //pass output vars (scalars by value, matrix by ref) to result
                    //(only if actually exported, hence in check for dirty, otherwise potential problems in result merge)
                    String datStr = ProgramConverter.serializeDataObject(rvar, mo);
                    ovalue.set(datStr);
                    out.collect(okey, ovalue);
                }
            }
        }
    }

    /**
     * For remote Spark parfor workers. This is a simplified version compared to MR.
     * 
     * @param workerID
     * @param vars
     * @param resultVars
     * @param rvarFnames
     * @throws DMLRuntimeException
     * @throws IOException
     */
    public static ArrayList<String> exportResultVariables(long workerID, LocalVariableMap vars,
            ArrayList<String> resultVars) throws DMLRuntimeException, IOException {
        ArrayList<String> ret = new ArrayList<String>();

        //foreach result variables probe if export necessary
        for (String rvar : resultVars) {
            Data dat = vars.get(rvar);

            //export output variable to HDFS (see RunMRJobs)
            if (dat != null && dat.getDataType() == DataType.MATRIX) {
                MatrixObject mo = (MatrixObject) dat;
                if (mo.isDirty()) {
                    //export result var (iff actually modified in parfor)
                    mo.exportData();

                    //pass output vars (scalars by value, matrix by ref) to result
                    //(only if actually exported, hence in check for dirty, otherwise potential problems in result merge)
                    ret.add(ProgramConverter.serializeDataObject(rvar, mo));
                }
            }
        }

        return ret;
    }

    /**
     * Cleanup all temporary files created by this SystemML process
     * instance.
     * 
     */
    public static void cleanupWorkingDirectories() {
        //use the given job configuration for infrastructure analysis (see configure);
        //this is important for robustness w/ misconfigured classpath which also contains
        //core-default.xml and hence hides the actual cluster configuration; otherwise
        //there is missing cleanup of working directories 
        JobConf job = ConfigurationManager.getCachedJobConf();

        if (!InfrastructureAnalyzer.isLocalMode(job)) {
            //delete cache files
            CacheableData.cleanupCacheDir();
            //disable caching (prevent dynamic eviction)
            CacheableData.disableCaching();
            //cleanup working dir (e.g., of CP_FILE instructions)
            LocalFileUtils.cleanupWorkingDirectory();
        }
    }

    /**
     * 
     * @param out
     * @return
     * @throws DMLRuntimeException
     * @throws IOException
     */
    public static LocalVariableMap[] getResults(List<Tuple2<Long, String>> out, Log LOG)
            throws DMLRuntimeException {
        HashMap<Long, LocalVariableMap> tmp = new HashMap<Long, LocalVariableMap>();

        int countAll = 0;
        for (Tuple2<Long, String> entry : out) {
            Long key = entry._1();
            String val = entry._2();
            if (!tmp.containsKey(key))
                tmp.put(key, new LocalVariableMap());
            Object[] dat = ProgramConverter.parseDataObject(val);
            tmp.get(key).put((String) dat[0], (Data) dat[1]);
            countAll++;
        }

        if (LOG != null) {
            LOG.debug("Num remote worker results (before deduplication): " + countAll);
            LOG.debug("Num remote worker results: " + tmp.size());
        }

        //create return array
        return tmp.values().toArray(new LocalVariableMap[0]);
    }
}