Example usage for org.apache.hadoop.conf Configuration getLong

List of usage examples for org.apache.hadoop.conf Configuration getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getLong.

Prototype

public long getLong(String name, long defaultValue) 

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java

License:Apache License

private boolean okToRunLocal(org.apache.hadoop.mapreduce.Job job, MapReduceOper mro, List<POLoad> lds)
        throws IOException {
    Configuration conf = job.getConfiguration();
    if (!conf.getBoolean(PigConfiguration.PIG_AUTO_LOCAL_ENABLED, false)) {
        return false;
    }// ww w. j  a va 2 s. com

    long totalInputFileSize = InputSizeReducerEstimator.getTotalInputFileSize(conf, lds, job);
    long inputByteMax = conf.getLong(PigConfiguration.PIG_AUTO_LOCAL_INPUT_MAXBYTES, 100 * 1000 * 1000l);
    log.info("Size of input: " + totalInputFileSize + " bytes. Small job threshold: " + inputByteMax);
    if (totalInputFileSize < 0 || totalInputFileSize > inputByteMax) {
        return false;
    }

    int reducers = conf.getInt(MRConfiguration.REDUCE_TASKS, 1);
    log.info("No of reducers: " + reducers);
    if (reducers > 1) {
        return false;
    }

    return true;
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.java

License:Apache License

protected List<InputSplit> getPigSplits(List<InputSplit> oneInputSplits, int inputIndex,
        ArrayList<OperatorKey> targetOps, long blockSize, boolean combinable, Configuration conf)
        throws IOException, InterruptedException {
    ArrayList<InputSplit> pigSplits = new ArrayList<InputSplit>();
    if (!combinable) {
        int splitIndex = 0;
        for (InputSplit inputSplit : oneInputSplits) {
            PigSplit pigSplit = new PigSplit(new InputSplit[] { inputSplit }, inputIndex, targetOps,
                    splitIndex++);//  w w w.j a v a 2 s. co m
            pigSplit.setConf(conf);
            pigSplits.add(pigSplit);
        }
        return pigSplits;
    } else {
        long maxCombinedSplitSize = conf.getLong("pig.maxCombinedSplitSize", 0);
        if (maxCombinedSplitSize == 0)
            // default is the block size
            maxCombinedSplitSize = blockSize;
        List<List<InputSplit>> combinedSplits = MapRedUtil.getCombinePigSplits(oneInputSplits,
                maxCombinedSplitSize, conf);
        for (int i = 0; i < combinedSplits.size(); i++)
            pigSplits.add(createPigSplit(combinedSplits.get(i), inputIndex, targetOps, i, conf));
        return pigSplits;
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputSplitFormat.java

License:Apache License

protected List<InputSplit> getPigSplits(List<InputSplit> oneInputSplits, int inputIndex,
        ArrayList<OperatorKey> targetOps, long blockSize, boolean combinable, Configuration conf)
        throws IOException, InterruptedException {
    ArrayList<InputSplit> pigSplits = new ArrayList<InputSplit>();
    if (!combinable) {
        int splitIndex = 0;
        for (InputSplit inputSplit : oneInputSplits) {
            PigSplit pigSplit = new PigSplit(new InputSplit[] { inputSplit }, inputIndex, targetOps,
                    splitIndex++);//from   www.j  a  v  a2  s .co  m
            pigSplit.setConf(conf);
            pigSplits.add(pigSplit);
        }
        return pigSplits;
    } else {
        long maxCombinedSplitSize = conf.getLong("pig.maxCombinedSplitSize", 0);
        if (maxCombinedSplitSize == 0)
            // default is the block size
            maxCombinedSplitSize = 1024;
        List<List<InputSplit>> combinedSplits = MapRedUtil.getCombinePigSplits(oneInputSplits,
                maxCombinedSplitSize, conf);
        for (int i = 0; i < combinedSplits.size(); i++)
            pigSplits.add(createPigSplit(combinedSplits.get(i), inputIndex, targetOps, i, conf));
        return pigSplits;
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.java

License:Apache License

@Override
public Result processInput() throws ExecException {

    // Make sure the reporter is set, because it isn't getting carried
    // across in the serialization (don't know why).  I suspect it's as
    // cheap to call the setReporter call everytime as to check whether I
    // have (hopefully java will inline it).
    if (!initialized) {
        func.setReporter(getReporter());
        func.setPigLogger(pigLogger);// w w w  .java  2  s  .c  o m
        Configuration jobConf = UDFContext.getUDFContext().getJobConf();
        if (jobConf != null) {
            doTiming = jobConf.getBoolean(TIME_UDFS, false);
            if (doTiming) {
                counterGroup = funcSpec.toString();
                timingFrequency = jobConf.getLong(TIME_UDFS_FREQUENCY, 100L);
            }
        }
        // We initialize here instead of instantiateFunc because this is called
        // when actual processing has begun, whereas a function can be instantiated
        // on the frontend potentially (mainly for optimization)
        Schema tmpS = func.getInputSchema();
        if (tmpS != null) {
            //Currently, getInstanceForSchema returns null if no class was found. This works fine...
            //if it is null, the default will be used. We pass the context because if it happens that
            //the same Schema was generated elsewhere, we do not want to override user expectations
            inputTupleMaker = SchemaTupleFactory.getInstance(tmpS, false, GenContext.UDF);
            if (inputTupleMaker == null) {
                LOG.debug("No SchemaTupleFactory found for Schema [" + tmpS + "], using default TupleFactory");
                usingSchemaTupleFactory = false;
            } else {
                LOG.debug("Using SchemaTupleFactory for Schema: " + tmpS);
                usingSchemaTupleFactory = true;
            }

            //In the future, we could optionally use SchemaTuples for output as well
        }

        if (inputTupleMaker == null) {
            inputTupleMaker = TupleFactory.getInstance();
        }

        initialized = true;
    }

    Result res = new Result();
    if (input == null && (inputs == null || inputs.size() == 0)) {
        res.returnStatus = POStatus.STATUS_EOP;
        return res;
    }

    //Should be removed once the model is clear
    if (getReporter() != null) {
        getReporter().progress();
    }

    if (isInputAttached()) {
        res.result = input;
        res.returnStatus = POStatus.STATUS_OK;
        detachInput();
        return res;
    } else {
        //we decouple this because there may be cases where the size is known and it isn't a schema
        // tuple factory
        boolean knownSize = usingSchemaTupleFactory;
        int knownIndex = 0;
        res.result = inputTupleMaker.newTuple();

        Result temp = null;

        for (PhysicalOperator op : inputs) {
            temp = op.getNext(op.getResultType());
            if (temp.returnStatus != POStatus.STATUS_OK) {
                return temp;
            }

            if (op instanceof POProject && op.getResultType() == DataType.TUPLE) {
                POProject projOp = (POProject) op;
                if (projOp.isProjectToEnd()) {
                    Tuple trslt = (Tuple) temp.result;
                    Tuple rslt = (Tuple) res.result;
                    for (int i = 0; i < trslt.size(); i++) {
                        if (knownSize) {
                            rslt.set(knownIndex++, trslt.get(i));
                        } else {
                            rslt.append(trslt.get(i));
                        }
                    }
                    continue;
                }
            }
            if (knownSize) {
                ((Tuple) res.result).set(knownIndex++, temp.result);
            } else {
                ((Tuple) res.result).append(temp.result);
            }
        }
        res.returnStatus = temp.returnStatus;

        return res;
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.TezOperDependencyParallelismEstimator.java

License:Apache License

@Override
public int estimateParallelism(TezOperPlan plan, TezOperator tezOper, Configuration conf) throws IOException {

    if (tezOper.isVertexGroup()) {
        return -1;
    }/*  w  w w.  j ava2 s .c om*/

    // TODO: If map opts and reduce opts are same estimate higher parallelism
    // for tasks based on the count of number of map tasks else be conservative as now
    maxTaskCount = conf.getInt(PigReducerEstimator.MAX_REDUCER_COUNT_PARAM,
            PigReducerEstimator.DEFAULT_MAX_REDUCER_COUNT_PARAM);

    bytesPerReducer = conf.getLong(PigReducerEstimator.BYTES_PER_REDUCER_PARAM,
            PigReducerEstimator.DEFAULT_BYTES_PER_REDUCER);

    // If parallelism is set explicitly, respect it
    if (!tezOper.isIntermediateReducer() && tezOper.getRequestedParallelism() != -1) {
        return tezOper.getRequestedParallelism();
    }

    // If we have already estimated parallelism, use that one
    if (tezOper.getEstimatedParallelism() != -1) {
        return tezOper.getEstimatedParallelism();
    }

    List<TezOperator> preds = plan.getPredecessors(tezOper);
    if (preds == null) {
        throw new IOException("Cannot estimate parallelism for source vertex");
    }

    double estimatedParallelism = 0;

    for (Entry<OperatorKey, TezEdgeDescriptor> entry : tezOper.inEdges.entrySet()) {
        TezOperator pred = getPredecessorWithKey(plan, tezOper, entry.getKey().toString());

        // Don't include broadcast edge, broadcast edge is used for
        // replicated join (covered in TezParallelismFactorVisitor.visitFRJoin)
        // and sample/scalar (does not impact parallelism)
        if (entry.getValue().dataMovementType == DataMovementType.SCATTER_GATHER
                || entry.getValue().dataMovementType == DataMovementType.ONE_TO_ONE) {
            double predParallelism = pred.getEffectiveParallelism(pc.defaultParallel);
            if (predParallelism == -1) {
                throw new IOException("Cannot estimate parallelism for " + tezOper.getOperatorKey().toString()
                        + ", effective parallelism for predecessor " + tezOper.getOperatorKey().toString()
                        + " is -1");
            }

            //For cases like Union we can just limit to sum of pred vertices parallelism
            boolean applyFactor = !tezOper.isUnion();
            if (!pred.isVertexGroup() && applyFactor) {
                predParallelism = predParallelism * pred.getParallelismFactor(tezOper);
                if (pred.getTotalInputFilesSize() > 0) {
                    // Estimate similar to mapreduce and use the maximum of two
                    int parallelismBySize = (int) Math
                            .ceil((double) pred.getTotalInputFilesSize() / bytesPerReducer);
                    predParallelism = Math.max(predParallelism, parallelismBySize);
                }
            }
            estimatedParallelism += predParallelism;
        }
    }

    int roundedEstimatedParallelism = (int) Math.ceil(estimatedParallelism);

    if (tezOper.isIntermediateReducer() && tezOper.isOverrideIntermediateParallelism()) {
        // Estimated reducers should not be more than the configured limit
        roundedEstimatedParallelism = Math.min(roundedEstimatedParallelism, maxTaskCount);
        int userSpecifiedParallelism = pc.defaultParallel;
        if (tezOper.getRequestedParallelism() != -1) {
            userSpecifiedParallelism = tezOper.getRequestedParallelism();
        }
        int intermediateParallelism = Math.max(userSpecifiedParallelism, roundedEstimatedParallelism);
        if (userSpecifiedParallelism != -1 && (intermediateParallelism > 200
                && intermediateParallelism > (2 * userSpecifiedParallelism))) {
            // Estimated reducers shall not be more than 2x of requested parallelism
            // if greater than 200 and we are overriding user specified values
            intermediateParallelism = 2 * userSpecifiedParallelism;
        }
        roundedEstimatedParallelism = intermediateParallelism;
    } else {
        roundedEstimatedParallelism = Math.min(roundedEstimatedParallelism, maxTaskCount);
    }

    if (roundedEstimatedParallelism == 0) {
        roundedEstimatedParallelism = 1; // We need to produce empty output file
    }

    return roundedEstimatedParallelism;
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder.java

License:Apache License

private Vertex newVertex(TezOperator tezOp, boolean isMap)
        throws IOException, ClassNotFoundException, InterruptedException {
    ProcessorDescriptor procDesc = ProcessorDescriptor.create(tezOp.getProcessorName());

    // Pass physical plans to vertex as user payload.
    JobConf payloadConf = new JobConf(ConfigurationUtil.toConfiguration(pc.getProperties(), false));

    // We do this so that dag.getCredentials(), job.getCredentials(),
    // job.getConfiguration().getCredentials() all reference the same Credentials object
    // Unfortunately there is no setCredentials() on Job
    payloadConf.setCredentials(dag.getCredentials());
    // We won't actually use this job, but we need it to talk with the Load Store funcs
    @SuppressWarnings("deprecation")
    Job job = new Job(payloadConf);
    payloadConf = (JobConf) job.getConfiguration();

    if (tezOp.sampleOperator != null) {
        payloadConf.set(PigProcessor.SAMPLE_VERTEX, tezOp.sampleOperator.getOperatorKey().toString());
    }/*from  www  . j av a  2 s  .c  o m*/

    if (tezOp.sortOperator != null) {
        payloadConf.set(PigProcessor.SORT_VERTEX, tezOp.sortOperator.getOperatorKey().toString());
    }

    String tmp;
    long maxCombinedSplitSize = 0;
    if (!tezOp.combineSmallSplits()
            || pc.getProperties().getProperty(PigConfiguration.PIG_SPLIT_COMBINATION, "true").equals("false"))
        payloadConf.setBoolean(PigConfiguration.PIG_NO_SPLIT_COMBINATION, true);
    else if ((tmp = pc.getProperties().getProperty(PigConfiguration.PIG_MAX_COMBINED_SPLIT_SIZE,
            null)) != null) {
        try {
            maxCombinedSplitSize = Long.parseLong(tmp);
        } catch (NumberFormatException e) {
            log.warn(
                    "Invalid numeric format for pig.maxCombinedSplitSize; use the default maximum combined split size");
        }
    }
    if (maxCombinedSplitSize > 0)
        payloadConf.setLong("pig.maxCombinedSplitSize", maxCombinedSplitSize);

    payloadConf.set("pig.inputs", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInp()));
    payloadConf.set("pig.inpSignatures",
            ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpSignatureLists()));
    payloadConf.set("pig.inpLimits", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpLimits()));
    // Process stores
    LinkedList<POStore> stores = processStores(tezOp, payloadConf, job);

    payloadConf.set("pig.pigContext", ObjectSerializer.serialize(pc));
    payloadConf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList()));
    payloadConf.set("exectype", "TEZ");
    payloadConf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
    payloadConf.setClass(MRConfiguration.INPUTFORMAT_CLASS, PigInputFormat.class, InputFormat.class);

    // Set parent plan for all operators in the Tez plan.
    new PhyPlanSetter(tezOp.plan).visit();

    // Set the endOfAllInput flag on the physical plan if certain operators that
    // use this property (such as STREAM) are present in the plan.
    EndOfAllInputSetter.EndOfAllInputChecker checker = new EndOfAllInputSetter.EndOfAllInputChecker(tezOp.plan);
    checker.visit();
    if (checker.isEndOfAllInputPresent()) {
        payloadConf.set(JobControlCompiler.END_OF_INP_IN_MAP, "true");
    }

    // Configure the classes for incoming shuffles to this TezOp
    // TODO: Refactor out resetting input keys, PIG-3957
    List<PhysicalOperator> roots = tezOp.plan.getRoots();
    if (roots.size() == 1 && roots.get(0) instanceof POPackage) {
        POPackage pack = (POPackage) roots.get(0);

        List<PhysicalOperator> succsList = tezOp.plan.getSuccessors(pack);
        if (succsList != null) {
            succsList = new ArrayList<PhysicalOperator>(succsList);
        }
        byte keyType = pack.getPkgr().getKeyType();
        tezOp.plan.remove(pack);
        payloadConf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
        setIntermediateOutputKeyValue(keyType, payloadConf, tezOp);
        POShuffleTezLoad newPack;
        newPack = new POShuffleTezLoad(pack);
        if (tezOp.isSkewedJoin()) {
            newPack.setSkewedJoins(true);
        }
        tezOp.plan.add(newPack);

        // Set input keys for POShuffleTezLoad. This is used to identify
        // the inputs that are attached to the POShuffleTezLoad in the
        // backend.
        Map<Integer, String> localRearrangeMap = new TreeMap<Integer, String>();
        for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
            if (tezOp.sampleOperator != null && tezOp.sampleOperator == pred) {
                // skip sample vertex input
            } else {
                String inputKey = pred.getOperatorKey().toString();
                if (pred.isVertexGroup()) {
                    pred = mPlan.getOperator(pred.getVertexGroupMembers().get(0));
                }
                LinkedList<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(pred.plan,
                        POLocalRearrangeTez.class);
                for (POLocalRearrangeTez lr : lrs) {
                    if (lr.isConnectedToPackage()
                            && lr.getOutputKey().equals(tezOp.getOperatorKey().toString())) {
                        localRearrangeMap.put((int) lr.getIndex(), inputKey);
                    }
                }
            }
        }
        for (Map.Entry<Integer, String> entry : localRearrangeMap.entrySet()) {
            newPack.addInputKey(entry.getValue());
        }

        if (succsList != null) {
            for (PhysicalOperator succs : succsList) {
                tezOp.plan.connect(newPack, succs);
            }
        }

        setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp);
    } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) {
        POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0);
        // TODO Need to fix multiple input key mapping
        TezOperator identityInOutPred = null;
        for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
            if (!pred.isSampleAggregation()) {
                identityInOutPred = pred;
                break;
            }
        }
        identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString());
    } else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) {
        POValueInputTez valueInput = (POValueInputTez) roots.get(0);

        LinkedList<String> scalarInputs = new LinkedList<String>();
        for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class)) {
            if (userFunc.getFunc() instanceof ReadScalarsTez) {
                scalarInputs.add(((ReadScalarsTez) userFunc.getFunc()).getTezInputs()[0]);
            }
        }
        // Make sure we don't find the scalar
        for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
            if (!scalarInputs.contains(pred.getOperatorKey().toString())) {
                valueInput.setInputKey(pred.getOperatorKey().toString());
                break;
            }
        }
    }
    JobControlCompiler.setOutputFormat(job);

    // set parent plan in all operators. currently the parent plan is really
    // used only when POStream, POSplit are present in the plan
    new PhyPlanSetter(tezOp.plan).visit();

    // Serialize the execution plan
    payloadConf.set(PigProcessor.PLAN, ObjectSerializer.serialize(tezOp.plan));

    UDFContext.getUDFContext().serialize(payloadConf);

    MRToTezHelper.processMRSettings(payloadConf, globalConf);

    if (!pc.inIllustrator) {
        for (POStore store : stores) {
            // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized
            store.setInputs(null);
            store.setParentPlan(null);
        }
        // We put them in the reduce because PigOutputCommitter checks the
        // ID of the task to see if it's a map, and if not, calls the reduce
        // committers.
        payloadConf.set(JobControlCompiler.PIG_MAP_STORES,
                ObjectSerializer.serialize(new ArrayList<POStore>()));
        payloadConf.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(stores));
    }

    if (tezOp.isNeedEstimateParallelism()) {
        payloadConf.setBoolean(PigProcessor.ESTIMATE_PARALLELISM, true);
        log.info("Estimate quantile for sample aggregation vertex " + tezOp.getOperatorKey().toString());
    }

    // Take our assembled configuration and create a vertex
    UserPayload userPayload = TezUtils.createUserPayloadFromConf(payloadConf);
    procDesc.setUserPayload(userPayload);

    Vertex vertex = Vertex.create(tezOp.getOperatorKey().toString(), procDesc, tezOp.getVertexParallelism(),
            isMap ? MRHelpers.getResourceForMRMapper(globalConf)
                    : MRHelpers.getResourceForMRReducer(globalConf));

    Map<String, String> taskEnv = new HashMap<String, String>();
    MRHelpers.updateEnvBasedOnMRTaskEnv(globalConf, taskEnv, isMap);
    vertex.setTaskEnvironment(taskEnv);

    // All these classes are @InterfaceAudience.Private in Hadoop. Switch to Tez methods in TEZ-1012
    // set the timestamps, public/private visibility of the archives and files
    ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(globalConf);
    // get DelegationToken for each cached file
    ClientDistributedCacheManager.getDelegationTokens(globalConf, job.getCredentials());
    MRApps.setupDistributedCache(globalConf, localResources);
    vertex.addTaskLocalFiles(localResources);

    vertex.setTaskLaunchCmdOpts(isMap ? MRHelpers.getJavaOptsForMRMapper(globalConf)
            : MRHelpers.getJavaOptsForMRReducer(globalConf));

    log.info("For vertex - " + tezOp.getOperatorKey().toString() + ": parallelism="
            + tezOp.getVertexParallelism() + ", memory=" + vertex.getTaskResource().getMemory() + ", java opts="
            + vertex.getTaskLaunchCmdOpts());

    // Right now there can only be one of each of these. Will need to be
    // more generic when there can be more.
    for (POLoad ld : tezOp.getLoaderInfo().getLoads()) {

        // TODO: These should get the globalConf, or a merged version that
        // keeps settings like pig.maxCombinedSplitSize
        vertex.setLocationHint(
                VertexLocationHint.create(tezOp.getLoaderInfo().getInputSplitInfo().getTaskLocationHints()));
        vertex.addDataSource(ld.getOperatorKey().toString(), DataSourceDescriptor.create(
                InputDescriptor.create(MRInput.class.getName())
                        .setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder()
                                .setConfigurationBytes(TezUtils.createByteStringFromConf(payloadConf))
                                .setSplits(tezOp.getLoaderInfo().getInputSplitInfo().getSplitsProto()).build()
                                .toByteString().asReadOnlyByteBuffer())),
                InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()),
                dag.getCredentials()));
    }

    for (POStore store : stores) {

        ArrayList<POStore> emptyList = new ArrayList<POStore>();
        ArrayList<POStore> singleStore = new ArrayList<POStore>();
        singleStore.add(store);

        Configuration outputPayLoad = new Configuration(payloadConf);
        outputPayLoad.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(emptyList));
        outputPayLoad.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(singleStore));

        OutputDescriptor storeOutDescriptor = OutputDescriptor.create(MROutput.class.getName())
                .setUserPayload(TezUtils.createUserPayloadFromConf(outputPayLoad));
        if (tezOp.getVertexGroupStores() != null) {
            OperatorKey vertexGroupKey = tezOp.getVertexGroupStores().get(store.getOperatorKey());
            if (vertexGroupKey != null) {
                getPlan().getOperator(vertexGroupKey).getVertexGroupInfo()
                        .setStoreOutputDescriptor(storeOutDescriptor);
                continue;
            }
        }
        vertex.addDataSink(store.getOperatorKey().toString(), new DataSinkDescriptor(storeOutDescriptor,
                OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), dag.getCredentials()));
    }

    // LoadFunc and StoreFunc add delegation tokens to Job Credentials in
    // setLocation and setStoreLocation respectively. For eg: HBaseStorage
    // InputFormat add delegation token in getSplits and OutputFormat in
    // checkOutputSpecs. For eg: FileInputFormat and FileOutputFormat
    if (stores.size() > 0) {
        new PigOutputFormat().checkOutputSpecs(job);
    }

    // Set the right VertexManagerPlugin
    if (tezOp.getEstimatedParallelism() != -1) {
        if (tezOp.isGlobalSort() || tezOp.isSkewedJoin()) {
            // Set VertexManagerPlugin to PartitionerDefinedVertexManager, which is able
            // to decrease/increase parallelism of sorting vertex dynamically
            // based on the numQuantiles calculated by sample aggregation vertex
            vertex.setVertexManagerPlugin(
                    VertexManagerPluginDescriptor.create(PartitionerDefinedVertexManager.class.getName()));
            log.info("Set VertexManagerPlugin to PartitionerDefinedParallelismVertexManager for vertex "
                    + tezOp.getOperatorKey().toString());
        } else {
            boolean containScatterGather = false;
            boolean containCustomPartitioner = false;
            for (TezEdgeDescriptor edge : tezOp.inEdges.values()) {
                if (edge.dataMovementType == DataMovementType.SCATTER_GATHER) {
                    containScatterGather = true;
                }
                if (edge.partitionerClass != null) {
                    containCustomPartitioner = true;
                }
            }
            if (containScatterGather && !containCustomPartitioner) {
                // Use auto-parallelism feature of ShuffleVertexManager to dynamically
                // reduce the parallelism of the vertex
                VertexManagerPluginDescriptor vmPluginDescriptor = VertexManagerPluginDescriptor
                        .create(ShuffleVertexManager.class.getName());
                Configuration vmPluginConf = ConfigurationUtil.toConfiguration(pc.getProperties(), false);
                vmPluginConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
                        true);
                if (vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM,
                        InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) != InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) {
                    vmPluginConf.setLong(
                            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
                            vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM,
                                    InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER));
                }
                vmPluginDescriptor.setUserPayload(TezUtils.createUserPayloadFromConf(vmPluginConf));
                vertex.setVertexManagerPlugin(vmPluginDescriptor);
                log.info("Set auto parallelism for vertex " + tezOp.getOperatorKey().toString());
            }
        }
    }

    // Reset udfcontext jobconf. It is not supposed to be set in the front end
    UDFContext.getUDFContext().addJobConf(null);
    return vertex;
}

From source file:org.apache.pig.CounterBasedErrorHandler.java

License:Apache License

public CounterBasedErrorHandler() {
    Configuration conf = UDFContext.getUDFContext().getJobConf();
    this.minErrors = conf.getLong(PigConfiguration.PIG_ERROR_HANDLING_MIN_ERROR_RECORDS, 0);
    this.errorThreshold = conf.getFloat(PigConfiguration.PIG_ERROR_HANDLING_THRESHOLD_PERCENT, 0.0f);
}

From source file:org.apache.pig.impl.util.SpillableMemoryManager.java

License:Apache License

public void configure(Configuration conf) {

    spillFileSizeThreshold = conf.getLong("pig.spill.size.threshold", spillFileSizeThreshold);
    gcActivationSize = conf.getLong("pig.spill.gc.activation.size", gcActivationSize);
    float memoryThresholdFraction = conf.getFloat(PigConfiguration.PIG_SPILL_MEMORY_USAGE_THRESHOLD_FRACTION,
            MEMORY_THRESHOLD_FRACTION_DEFAULT);
    float collectionThresholdFraction = conf.getFloat(PigConfiguration.PIG_SPILL_COLLECTION_THRESHOLD_FRACTION,
            COLLECTION_THRESHOLD_FRACTION_DEFAULT);
    long unusedMemoryThreshold = conf.getLong(PigConfiguration.PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE,
            UNUSED_MEMORY_THRESHOLD_DEFAULT);
    configureMemoryThresholds(memoryThresholdFraction, collectionThresholdFraction, unusedMemoryThreshold);
}

From source file:org.apache.pig.test.Util.java

License:Apache License

private static void assertConfLong(Configuration conf, String param, long expected) {
    assertEquals("Unexpected value found in configs for " + param, expected, conf.getLong(param, -1));
}

From source file:org.apache.rya.indexing.accumulo.ConfigUtils.java

License:Apache License

public static long getWriterMaxLatency(final Configuration conf) {
    return conf.getLong(CLOUDBASE_WRITER_MAX_LATENCY, WRITER_MAX_LATNECY);
}