org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil.java Source code

Introduction

Here is the source code for org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pig.backend.hadoop.executionengine.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.pig.FuncSpec;
import org.apache.pig.PigConfiguration;
import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.builtin.PartitionSkewedKeys;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.io.FileSpec;
import org.apache.pig.impl.io.ReadToEndLoader;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.util.Pair;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.impl.util.Utils;

/**
 * A class of utility static methods to be used in the hadoop map reduce backend
 */
public class MapRedUtil {

    private static Log log = LogFactory.getLog(MapRedUtil.class);
    private static final TupleFactory tf = TupleFactory.getInstance();

    public static final String FILE_SYSTEM_NAME = "fs.default.name";

    /**
     * Loads the key distribution sampler file
     *
     * @param keyDistFile the name for the distribution file
     * @param totalReducers gets set to the total number of reducers as found in the dist file
     * @param keyType Type of the key to be stored in the return map. It currently treats Tuple as a special case.
     */
    @SuppressWarnings("unchecked")
    public static <E> Map<E, Pair<Integer, Integer>> loadPartitionFileFromLocalCache(String keyDistFile,
            Integer[] totalReducers, byte keyType, Configuration mapConf) throws IOException {

        Map<E, Pair<Integer, Integer>> reducerMap = new HashMap<E, Pair<Integer, Integer>>();

        // use local file system to get the keyDistFile
        Configuration conf = new Configuration(false);

        if (mapConf.get("yarn.resourcemanager.principal") != null) {
            conf.set("yarn.resourcemanager.principal", mapConf.get("yarn.resourcemanager.principal"));
        }

        if (PigMapReduce.sJobConfInternal.get().get("fs.file.impl") != null)
            conf.set("fs.file.impl", PigMapReduce.sJobConfInternal.get().get("fs.file.impl"));
        if (PigMapReduce.sJobConfInternal.get().get("fs.hdfs.impl") != null)
            conf.set("fs.hdfs.impl", PigMapReduce.sJobConfInternal.get().get("fs.hdfs.impl"));

        copyTmpFileConfigurationValues(PigMapReduce.sJobConfInternal.get(), conf);

        conf.set(MapRedUtil.FILE_SYSTEM_NAME, "file:///");

        ReadToEndLoader loader = new ReadToEndLoader(
                Utils.getTmpFileStorageObject(PigMapReduce.sJobConfInternal.get()), conf, keyDistFile, 0);
        DataBag partitionList;
        Tuple t = loader.getNext();
        if (t == null) {
            // this could happen if the input directory for sampling is empty
            log.warn("Empty dist file: " + keyDistFile);
            return reducerMap;
        }
        // The keydist file is structured as (key, min, max)
        // min, max being the index of the reducers
        Map<String, Object> distMap = (Map<String, Object>) t.get(0);
        partitionList = (DataBag) distMap.get(PartitionSkewedKeys.PARTITION_LIST);
        totalReducers[0] = Integer.valueOf("" + distMap.get(PartitionSkewedKeys.TOTAL_REDUCERS));
        Iterator<Tuple> it = partitionList.iterator();
        while (it.hasNext()) {
            Tuple idxTuple = it.next();
            Integer maxIndex = (Integer) idxTuple.get(idxTuple.size() - 1);
            Integer minIndex = (Integer) idxTuple.get(idxTuple.size() - 2);
            // Used to replace the maxIndex with the number of reducers
            if (maxIndex < minIndex) {
                maxIndex = totalReducers[0] + maxIndex;
            }
            E keyT;

            // if the join is on more than 1 key
            if (idxTuple.size() > 3) {
                // remove the last 2 fields of the tuple, i.e: minIndex and maxIndex and store
                // it in the reducer map
                Tuple keyTuple = tf.newTuple();
                for (int i = 0; i < idxTuple.size() - 2; i++) {
                    keyTuple.append(idxTuple.get(i));
                }
                keyT = (E) keyTuple;
            } else {
                if (keyType == DataType.TUPLE) {
                    keyT = (E) tf.newTuple(1);
                    ((Tuple) keyT).set(0, idxTuple.get(0));
                } else {
                    keyT = (E) idxTuple.get(0);
                }
            }
            // number of reducers
            Integer cnt = maxIndex - minIndex;
            reducerMap.put(keyT, new Pair(minIndex, cnt));// 1 is added to account for the 0 index
        }
        return reducerMap;
    }

    public static void copyTmpFileConfigurationValues(Configuration fromConf, Configuration toConf) {
        // Currently these are used only by loaders (and not storers), so we do not need to copy
        // mapred properties that are required by @{Link SequenceFileInterStorage}

        if (fromConf.getBoolean(PigConfiguration.PIG_ENABLE_TEMP_FILE_COMPRESSION, false)) {
            toConf.setBoolean(PigConfiguration.PIG_ENABLE_TEMP_FILE_COMPRESSION, true);
            if (fromConf.get(PigConfiguration.PIG_TEMP_FILE_COMPRESSION_CODEC) != null) {
                toConf.set(PigConfiguration.PIG_TEMP_FILE_COMPRESSION_CODEC,
                        fromConf.get(PigConfiguration.PIG_TEMP_FILE_COMPRESSION_CODEC));
            }
            if (fromConf.get(PigConfiguration.PIG_TEMP_FILE_COMPRESSION_STORAGE) != null) {
                toConf.set(PigConfiguration.PIG_TEMP_FILE_COMPRESSION_STORAGE,
                        fromConf.get(PigConfiguration.PIG_TEMP_FILE_COMPRESSION_STORAGE));
            }
        }
    }

    public static void setupUDFContext(Configuration job) throws IOException {
        UDFContext udfc = UDFContext.getUDFContext();
        udfc.addJobConf(job);
        // don't deserialize in front-end
        if (udfc.isUDFConfEmpty()) {
            udfc.deserialize();
        }
    }

    /**
     * Sets up output and log dir paths for a single-store streaming job
     *
     * @param st - POStore of the current job
     * @param pigContext
     * @param conf
     * @throws IOException
     */
    public static void setupStreamingDirsConfSingle(POStore st, PigContext pigContext, Configuration conf)
            throws IOException {
        // set out filespecs
        String outputPathString = st.getSFile().getFileName();
        if (HadoopShims.hasFileSystemImpl(new Path(outputPathString), conf)) {
            conf.set("pig.streaming.log.dir", new Path(outputPathString, JobControlCompiler.LOG_DIR).toString());
        } else {
            String tmpLocationStr = FileLocalizer.getTemporaryPath(pigContext).toString();
            Path tmpLocation = new Path(tmpLocationStr);
            conf.set("pig.streaming.log.dir", new Path(tmpLocation, JobControlCompiler.LOG_DIR).toString());
        }
        conf.set("pig.streaming.task.output.dir", outputPathString);
    }

    /**
     * Sets up output and log dir paths for a multi-store streaming job
     *
     * @param pigContext
     * @param conf
     * @throws IOException
     */
    public static void setupStreamingDirsConfMulti(PigContext pigContext, Configuration conf) throws IOException {

        String tmpLocationStr = FileLocalizer.getTemporaryPath(pigContext).toString();
        Path tmpLocation = new Path(tmpLocationStr);
        conf.set("pig.streaming.log.dir", new Path(tmpLocation, JobControlCompiler.LOG_DIR).toString());
        conf.set("pig.streaming.task.output.dir", tmpLocation.toString());
    }

    public static FileSpec checkLeafIsStore(PhysicalPlan plan, PigContext pigContext) throws ExecException {
        try {
            PhysicalOperator leaf = plan.getLeaves().get(0);
            FileSpec spec = null;
            if (!(leaf instanceof POStore)) {
                String scope = leaf.getOperatorKey().getScope();
                POStore str = new POStore(
                        new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)));
                spec = new FileSpec(FileLocalizer.getTemporaryPath(pigContext).toString(),
                        new FuncSpec(Utils.getTmpFileCompressorName(pigContext)));
                str.setSFile(spec);
                plan.addAsLeaf(str);
            } else {
                spec = ((POStore) leaf).getSFile();
            }
            return spec;
        } catch (Exception e) {
            int errCode = 2045;
            String msg = "Internal error. Not able to check if the leaf node is a store operator.";
            throw new ExecException(msg, errCode, PigException.BUG, e);
        }
    }

    /**
     * Get all files recursively from the given list of files
     *
     * @param files a list of FileStatus
     * @param conf the configuration object
     * @return the list of fileStatus that contains all the files in the given
     *         list and, recursively, all the files inside the directories in
     *         the given list
     * @throws IOException
     */
    public static List<FileStatus> getAllFileRecursively(List<FileStatus> files, Configuration conf)
            throws IOException {
        List<FileStatus> result = new ArrayList<FileStatus>();
        int len = files.size();
        for (int i = 0; i < len; ++i) {
            FileStatus file = files.get(i);
            if (file.isDir()) {
                Path p = file.getPath();
                FileSystem fs = p.getFileSystem(conf);
                addInputPathRecursively(result, fs, p, hiddenFileFilter);
            } else {
                result.add(file);
            }
        }
        log.info("Total input paths to process : " + result.size());
        return result;
    }

    private static void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
            PathFilter inputFilter) throws IOException {
        for (FileStatus stat : fs.listStatus(path, inputFilter)) {
            if (stat.isDir()) {
                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
            } else {
                result.add(stat);
            }
        }
    }

    private static final PathFilter hiddenFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    };

    public static long getPathLength(FileSystem fs, FileStatus status) throws IOException {
        return getPathLength(fs, status, Long.MAX_VALUE);
    }

    /**
     * Returns the total number of bytes for this file, or if a directory all
     * files in the directory.
     * 
     * @param fs FileSystem
     * @param status FileStatus
     * @param max Maximum value of total length that will trigger exit. Many
     * times we're only interested whether the total length of files is greater
     * than X or not. In such case, we can exit the function early as soon as
     * the max is reached.
     * @return
     * @throws IOException
     */
    public static long getPathLength(FileSystem fs, FileStatus status, long max) throws IOException {
        if (!status.isDir()) {
            return status.getLen();
        } else {
            FileStatus[] children = fs.listStatus(status.getPath(), hiddenFileFilter);
            long size = 0;
            for (FileStatus child : children) {
                size += getPathLength(fs, child, max);
                if (size > max)
                    return size;
            }
            return size;
        }
    }

    /* The following codes are for split combination: see PIG-1518
     *
     */
    private static Comparator<Node> nodeComparator = new Comparator<Node>() {
        @Override
        public int compare(Node o1, Node o2) {
            long cmp = o1.length - o2.length;
            return cmp == 0 ? 0 : cmp < 0 ? -1 : 1;
        }
    };

    private static final class ComparableSplit implements Comparable<ComparableSplit> {
        private InputSplit rawInputSplit;
        private HashSet<Node> nodes;
        // id used as a tie-breaker when two splits are of equal size.
        private long id;

        ComparableSplit(InputSplit split, long id) {
            rawInputSplit = split;
            nodes = new HashSet<Node>();
            this.id = id;
        }

        void add(Node node) {
            nodes.add(node);
        }

        void removeFromNodes() {
            for (Node node : nodes)
                node.remove(this);
        }

        public InputSplit getSplit() {
            return rawInputSplit;
        }

        @Override
        public boolean equals(Object other) {
            if (other == null || !(other instanceof ComparableSplit))
                return false;
            return (compareTo((ComparableSplit) other) == 0);
        }

        @Override
        public int hashCode() {
            return 41;
        }

        @Override
        public int compareTo(ComparableSplit other) {
            try {
                long cmp = rawInputSplit.getLength() - other.rawInputSplit.getLength();
                // in descending order
                return cmp == 0 ? (id == other.id ? 0 : id < other.id ? -1 : 1) : cmp < 0 ? 1 : -1;
            } catch (IOException e) {
                throw new RuntimeException(e);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
    }

    private static class DummySplit extends InputSplit {
        private long length;

        @Override
        public String[] getLocations() {
            return null;
        }

        @Override
        public long getLength() {
            return length;
        }

        public void setLength(long length) {
            this.length = length;
        }
    }

    private static class Node {
        private long length = 0;
        private ArrayList<ComparableSplit> splits;
        private boolean sorted;

        Node() throws IOException, InterruptedException {
            length = 0;
            splits = new ArrayList<ComparableSplit>();
            sorted = false;
        }

        void add(ComparableSplit split) throws IOException, InterruptedException {
            splits.add(split);
            length++;
        }

        void remove(ComparableSplit split) {
            if (!sorted)
                sort();
            int index = Collections.binarySearch(splits, split);
            if (index >= 0) {
                splits.remove(index);
                length--;
            }
        }

        void sort() {
            if (!sorted) {
                Collections.sort(splits);
                sorted = true;
            }
        }

        ArrayList<ComparableSplit> getSplits() {
            return splits;
        }

        public long getLength() {
            return length;
        }
    }

    public static List<List<InputSplit>> getCombinePigSplits(List<InputSplit> oneInputSplits,
            long maxCombinedSplitSize, Configuration conf) throws IOException, InterruptedException {
        ArrayList<Node> nodes = new ArrayList<Node>();
        HashMap<String, Node> nodeMap = new HashMap<String, Node>();
        List<List<InputSplit>> result = new ArrayList<List<InputSplit>>();
        List<Long> resultLengths = new ArrayList<Long>();
        long comparableSplitId = 0;

        int size = 0, nSplits = oneInputSplits.size();
        InputSplit lastSplit = null;
        int emptyCnt = 0;
        for (InputSplit split : oneInputSplits) {
            if (split.getLength() == 0) {
                emptyCnt++;
                continue;
            }
            if (split.getLength() >= maxCombinedSplitSize) {
                comparableSplitId++;
                ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>();
                combinedSplits.add(split);
                result.add(combinedSplits);
                resultLengths.add(split.getLength());
            } else {
                String[] locations = split.getLocations();
                if (locations.length == 0) {
                    // This split is missing blocks, or the split returned bad locations.
                    // Don't try to combine.
                    comparableSplitId++;
                    ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>();
                    combinedSplits.add(split);
                    result.add(combinedSplits);
                    resultLengths.add(split.getLength());
                } else {
                    ComparableSplit csplit = new ComparableSplit(split, comparableSplitId++);
                    // sort the locations to stabilize the number of maps: PIG-1757
                    Arrays.sort(locations);
                    HashSet<String> locationSeen = new HashSet<String>();
                    for (String location : locations) {
                        if (!locationSeen.contains(location)) {
                            Node node = nodeMap.get(location);
                            if (node == null) {
                                node = new Node();
                                nodes.add(node);
                                nodeMap.put(location, node);
                            }
                            node.add(csplit);
                            csplit.add(node);
                            locationSeen.add(location);
                        }
                    }
                    lastSplit = split;
                    size++;
                }
            }
        }
        /* verification code: debug purpose
        {
          ArrayList<ComparableSplit> leftoverSplits = new ArrayList<ComparableSplit>();
          HashSet<InputSplit> seen = new HashSet<InputSplit>();
          for (Node node : nodes) {
        if (node.getLength() > 0)
        {
          ArrayList<ComparableSplit> splits = node.getSplits();
          for (ComparableSplit split : splits) {
            if (!seen.contains(split.getSplit())) {
              // remove duplicates. The set has to be on the raw input split not the
              // comparable input split as the latter overrides the compareTo method
              // so its equality semantics is changed and not we want here
              seen.add(split.getSplit());
              leftoverSplits.add(split);
            }
          }
        }
          }
            
          int combinedSplitLen = 0;
          for (PigSplit split : result)
        combinedSplitLen += split.getNumPaths();
          if (combinedSplitLen + leftoverSplits.size()!= nSplits-emptyCnt) {
        throw new AssertionError("number of combined splits {"+combinedSplitLen+"+"+leftoverSplits.size()+"-"+size+"} does not match the number of original splits ["+nSplits+"].");
          }
        }
        */
        if (nSplits > 0 && emptyCnt == nSplits) {
            // if all splits are empty, add a single empty split as currently an empty directory is
            // not properly handled somewhere
            ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>();
            combinedSplits.add(oneInputSplits.get(0));
            result.add(combinedSplits);
        } else if (size == 1) {
            ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>();
            combinedSplits.add(lastSplit);
            result.add(combinedSplits);
        } else if (size > 1) {
            // combine small splits
            Collections.sort(nodes, nodeComparator);
            DummySplit dummy = new DummySplit();
            // dummy is used to search for next split of suitable size to be combined
            ComparableSplit dummyComparableSplit = new ComparableSplit(dummy, -1);
            for (Node node : nodes) {
                // sort the splits on this node in descending order
                node.sort();
                long totalSize = 0;
                ArrayList<ComparableSplit> splits = node.getSplits();
                int idx;
                int lenSplits;
                ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>();
                ArrayList<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>();
                while (!splits.isEmpty()) {
                    combinedSplits.add(splits.get(0).getSplit());
                    combinedComparableSplits.add(splits.get(0));
                    int startIdx = 1;
                    lenSplits = splits.size();
                    totalSize += splits.get(0).getSplit().getLength();
                    long spaceLeft = maxCombinedSplitSize - totalSize;
                    dummy.setLength(spaceLeft);
                    idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits),
                            dummyComparableSplit);
                    idx = -idx - 1 + startIdx;
                    while (idx < lenSplits) {
                        long thisLen = splits.get(idx).getSplit().getLength();
                        combinedSplits.add(splits.get(idx).getSplit());
                        combinedComparableSplits.add(splits.get(idx));
                        totalSize += thisLen;
                        spaceLeft -= thisLen;
                        if (spaceLeft <= 0)
                            break;
                        // find next combinable chunk
                        startIdx = idx + 1;
                        if (startIdx >= lenSplits)
                            break;
                        dummy.setLength(spaceLeft);
                        idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits),
                                dummyComparableSplit);
                        idx = -idx - 1 + startIdx;
                    }
                    if (totalSize > maxCombinedSplitSize / 2) {
                        result.add(combinedSplits);
                        resultLengths.add(totalSize);
                        removeSplits(combinedComparableSplits);
                        totalSize = 0;
                        combinedSplits = new ArrayList<InputSplit>();
                        combinedComparableSplits.clear();
                        splits = node.getSplits();
                    } else {
                        if (combinedSplits.size() != lenSplits)
                            throw new AssertionError("Combined split logic error!");
                        break;
                    }
                }
            }
            // handle leftovers
            ArrayList<ComparableSplit> leftoverSplits = new ArrayList<ComparableSplit>();
            HashSet<InputSplit> seen = new HashSet<InputSplit>();
            for (Node node : nodes) {
                for (ComparableSplit split : node.getSplits()) {
                    if (!seen.contains(split.getSplit())) {
                        // remove duplicates. The set has to be on the raw input split not the
                        // comparable input split as the latter overrides the compareTo method
                        // so its equality semantics is changed and not we want here
                        seen.add(split.getSplit());
                        leftoverSplits.add(split);
                    }
                }
            }

            /* verification code
            int combinedSplitLen = 0;
            for (PigSplit split : result)
              combinedSplitLen += split.getNumPaths();
            if (combinedSplitLen + leftoverSplits.size()!= nSplits-emptyCnt)
              throw new AssertionError("number of combined splits ["+combinedSplitLen+"+"+leftoverSplits.size()+"] does not match the number of original splits ["+nSplits+"].");
            */
            if (!leftoverSplits.isEmpty()) {
                long totalSize = 0;
                ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>();
                ArrayList<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>();

                int splitLen = leftoverSplits.size();
                for (int i = 0; i < splitLen; i++) {
                    ComparableSplit split = leftoverSplits.get(i);
                    long thisLen = split.getSplit().getLength();
                    if (totalSize + thisLen >= maxCombinedSplitSize) {
                        removeSplits(combinedComparableSplits);
                        result.add(combinedSplits);
                        resultLengths.add(totalSize);
                        combinedSplits = new ArrayList<InputSplit>();
                        combinedComparableSplits.clear();
                        totalSize = 0;
                    }
                    combinedSplits.add(split.getSplit());
                    combinedComparableSplits.add(split);
                    totalSize += split.getSplit().getLength();
                    if (i == splitLen - 1) {
                        // last piece: it could be very small, try to see it can be squeezed into any existing splits
                        for (int j = 0; j < result.size(); j++) {
                            if (resultLengths.get(j) + totalSize <= maxCombinedSplitSize) {
                                List<InputSplit> isList = result.get(j);
                                for (InputSplit csplit : combinedSplits) {
                                    isList.add(csplit);
                                }
                                removeSplits(combinedComparableSplits);
                                combinedSplits.clear();
                                break;
                            }
                        }
                        if (!combinedSplits.isEmpty()) {
                            // last piece can not be squeezed in, create a new combined split for them.
                            removeSplits(combinedComparableSplits);
                            result.add(combinedSplits);
                        }
                    }
                }
            }
        }
        /* verification codes
        int combinedSplitLen = 0;
        for (PigSplit split : result)
          combinedSplitLen += split.getNumPaths();
        if (combinedSplitLen != nSplits-emptyCnt)
          throw new AssertionError("number of combined splits ["+combinedSplitLen+"] does not match the number of original splits ["+nSplits+"].");
            
        long totalLen = 0;
        for (PigSplit split : result)
          totalLen += split.getLength();
            
        long origTotalLen = 0;
        for (InputSplit split : oneInputSplits)
          origTotalLen += split.getLength();
        if (totalLen != origTotalLen)
          throw new AssertionError("The total length ["+totalLen+"] does not match the original ["+origTotalLen+"]");
        */
        log.info("Total input paths (combined) to process : " + result.size());
        return result;
    }

    private static void removeSplits(List<ComparableSplit> splits) {
        for (ComparableSplit split : splits)
            split.removeFromNodes();
    }

    public String inputSplitToString(InputSplit[] splits) throws IOException, InterruptedException {
        // debugging purpose only
        StringBuilder st = new StringBuilder();
        st.append("Number of splits :" + splits.length + "\n");
        long len = 0;
        for (InputSplit split : splits)
            len += split.getLength();
        st.append("Total Length = " + len + "\n");
        for (int i = 0; i < splits.length; i++) {
            st.append("Input split[" + i + "]:\n   Length = " + splits[i].getLength() + "\n  Locations:\n");
            for (String location : splits[i].getLocations())
                st.append("    " + location + "\n");
            st.append("\n-----------------------\n");
        }
        return st.toString();
    }

    /* verification code: debug purpose only
    public String inputSplitToString(ArrayList<ComparableSplit> splits) throws IOException, InterruptedException {
      StringBuilder st = new StringBuilder();
      st.append("Number of splits :" + splits.size()+"\n");
      long len = 0;
      for (ComparableSplit split: splits)
    len += split.getSplit().getLength();
      st.append("Total Length = "+ len +"\n");
      for (int i = 0; i < splits.size(); i++) {
    st.append("Input split["+i+"]:\n   Length = "+ splits.get(i).getSplit().getLength()+"\n  Locations:\n");
    for (String location :  splits.get(i).getSplit().getLocations())
      st.append("    "+location+"\n");
    st.append("\n-----------------------\n");
      }
      return st.toString();
    }
    */
}