org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.WeightedRangePartitioner.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.WeightedRangePartitioner.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.pig.ExecType;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.HDataType;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
import org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.InternalMap;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.builtin.FindQuantiles;
import org.apache.pig.impl.io.NullableBigDecimalWritable;
import org.apache.pig.impl.io.NullableBigIntegerWritable;
import org.apache.pig.impl.io.NullableBooleanWritable;
import org.apache.pig.impl.io.NullableBytesWritable;
import org.apache.pig.impl.io.NullableDateTimeWritable;
import org.apache.pig.impl.io.NullableDoubleWritable;
import org.apache.pig.impl.io.NullableFloatWritable;
import org.apache.pig.impl.io.NullableIntWritable;
import org.apache.pig.impl.io.NullableLongWritable;
import org.apache.pig.impl.io.NullableText;
import org.apache.pig.impl.io.NullableTuple;
import org.apache.pig.impl.io.PigNullableWritable;
import org.apache.pig.impl.io.ReadToEndLoader;
import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.impl.util.Utils;

public class WeightedRangePartitioner extends Partitioner<PigNullableWritable, Writable> implements Configurable {
    PigNullableWritable[] quantiles;
    RawComparator<PigNullableWritable> comparator;
    PigContext pigContext;
    final public static Map<PigNullableWritable, DiscreteProbabilitySampleGenerator> weightedParts = new HashMap<PigNullableWritable, DiscreteProbabilitySampleGenerator>();

    private static final Log log = LogFactory.getLog(WeightedRangePartitioner.class);

    Configuration job;

    @SuppressWarnings("unchecked")
    @Override
    public int getPartition(PigNullableWritable key, Writable value, int numPartitions) {
        if (comparator == null) {
            comparator = (RawComparator<PigNullableWritable>) PigMapReduce.sJobContext.getSortComparator();
        }

        if (!weightedParts.containsKey(key)) {
            int index = Arrays.binarySearch(quantiles, key, comparator);
            if (index < 0)
                index = -index - 1;
            else
                index = index + 1;
            return Math.min(index, numPartitions - 1);
        }
        DiscreteProbabilitySampleGenerator gen = weightedParts.get(key);
        return gen.getNext();
    }

    @SuppressWarnings("unchecked")
    @Override
    public void setConf(Configuration configuration) {
        job = configuration;

        try {
            pigContext = (PigContext) ObjectSerializer.deserialize(job.get("pig.pigContext"));
        } catch (IOException e1) {
            // should not happen
            e1.printStackTrace();
        }

        String quantilesFile = configuration.get("pig.quantilesFile", "");

        if (quantilesFile.length() == 0) {
            throw new RuntimeException(this.getClass().getSimpleName() + " used but no quantiles found");
        }

        try {
            // use local file system to get the quantilesFile
            Configuration conf;
            if (pigContext.getExecType() == ExecType.MAPREDUCE) {
                conf = new Configuration(true);
            } else {
                conf = new Configuration(false);
            }
            if (configuration.get("fs.file.impl") != null) {
                conf.set("fs.file.impl", configuration.get("fs.file.impl"));
            }
            if (configuration.get("fs.hdfs.impl") != null) {
                conf.set("fs.hdfs.impl", configuration.get("fs.hdfs.impl"));
            }
            if (configuration.getBoolean("pig.tmpfilecompression", false)) {
                conf.setBoolean("pig.tmpfilecompression", true);
                if (configuration.get("pig.tmpfilecompression.codec") != null) {
                    conf.set("pig.tmpfilecompression.codec", configuration.get("pig.tmpfilecompression.codec"));
                }
            }
            conf.set(MapRedUtil.FILE_SYSTEM_NAME, "file:///");

            ReadToEndLoader loader = new ReadToEndLoader(Utils.getTmpFileStorageObject(conf), conf, quantilesFile,
                    0);
            DataBag quantilesList;
            Tuple t = loader.getNext();
            if (t != null) {
                // the Quantiles file has a tuple as under:
                // (numQuantiles, bag of samples)
                // numQuantiles here is the reduce parallelism
                Map<String, Object> quantileMap = (Map<String, Object>) t.get(0);
                quantilesList = (DataBag) quantileMap.get(FindQuantiles.QUANTILES_LIST);
                InternalMap weightedPartsData = (InternalMap) quantileMap.get(FindQuantiles.WEIGHTED_PARTS);
                convertToArray(quantilesList);
                for (Entry<Object, Object> ent : weightedPartsData.entrySet()) {
                    Tuple key = (Tuple) ent.getKey(); // sample item which repeats
                    float[] probVec = getProbVec((Tuple) ent.getValue());
                    weightedParts.put(getPigNullableWritable(key), new DiscreteProbabilitySampleGenerator(probVec));
                }
            }
            // else - the quantiles file is empty - unless we have a bug, the
            // input must also be empty in which case we don't need to put
            // anything in weightedParts since getPartition() should never get
            // called. If the quantiles file is empty due to either a bug or
            // a transient failure situation on the dfs, then weightedParts will
            // not be populated and the job will fail in getPartition()
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * @param value
     * @return
     * @throws ExecException
     */
    private float[] getProbVec(Tuple values) throws ExecException {
        float[] probVec = new float[values.size()];
        for (int i = 0; i < values.size(); i++) {
            probVec[i] = (Float) values.get(i);
        }
        return probVec;
    }

    private PigNullableWritable getPigNullableWritable(Tuple t) {
        try {
            // user comparators work with tuples - so if user comparator
            // is being used OR if there are more than 1 sort cols, use
            // NullableTuple
            if ("true".equals(job.get("pig.usercomparator")) || t.size() > 1) {
                return new NullableTuple(t);
            } else {
                Object o = t.get(0);
                String kts = job.get("pig.reduce.key.type");
                if (kts == null) {
                    throw new RuntimeException("Didn't get reduce key type " + "from config file.");
                }
                return HDataType.getWritableComparableTypes(o, Byte.valueOf(kts));
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private void convertToArray(DataBag quantilesListAsBag) {
        ArrayList<PigNullableWritable> quantilesList = getList(quantilesListAsBag);
        if ("true".equals(job.get("pig.usercomparator"))
                || quantilesList.get(0).getClass().equals(NullableTuple.class)) {
            quantiles = quantilesList.toArray(new NullableTuple[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableBytesWritable.class)) {
            quantiles = quantilesList.toArray(new NullableBytesWritable[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableDoubleWritable.class)) {
            quantiles = quantilesList.toArray(new NullableDoubleWritable[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableBigIntegerWritable.class)) {
            quantiles = quantilesList.toArray(new NullableBigIntegerWritable[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableBigDecimalWritable.class)) {
            quantiles = quantilesList.toArray(new NullableBigDecimalWritable[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableFloatWritable.class)) {
            quantiles = quantilesList.toArray(new NullableFloatWritable[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableBooleanWritable.class)) {
            quantiles = quantilesList.toArray(new NullableBooleanWritable[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableIntWritable.class)) {
            quantiles = quantilesList.toArray(new NullableIntWritable[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableLongWritable.class)) {
            quantiles = quantilesList.toArray(new NullableLongWritable[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableDateTimeWritable.class)) {
            quantiles = quantilesList.toArray(new NullableDateTimeWritable[0]);
        } else if (quantilesList.get(0).getClass().equals(NullableText.class)) {
            quantiles = quantilesList.toArray(new NullableText[0]);
        } else {
            throw new RuntimeException("Unexpected class in " + this.getClass().getSimpleName());
        }
    }

    /**
     * @param quantilesListAsBag
     * @return
     */
    private ArrayList<PigNullableWritable> getList(DataBag quantilesListAsBag) {

        ArrayList<PigNullableWritable> list = new ArrayList<PigNullableWritable>();
        for (Tuple tuple : quantilesListAsBag) {
            list.add(getPigNullableWritable(tuple));
        }
        return list;
    }

    @Override
    public Configuration getConf() {
        return job;
    }

}