List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java
License:Apache License
private boolean okToRunLocal(org.apache.hadoop.mapreduce.Job job, MapReduceOper mro, List<POLoad> lds) throws IOException { Configuration conf = job.getConfiguration(); if (!conf.getBoolean(PigConfiguration.PIG_AUTO_LOCAL_ENABLED, false)) { return false; }// ww w. j a va 2 s. com long totalInputFileSize = InputSizeReducerEstimator.getTotalInputFileSize(conf, lds, job); long inputByteMax = conf.getLong(PigConfiguration.PIG_AUTO_LOCAL_INPUT_MAXBYTES, 100 * 1000 * 1000l); log.info("Size of input: " + totalInputFileSize + " bytes. Small job threshold: " + inputByteMax); if (totalInputFileSize < 0 || totalInputFileSize > inputByteMax) { return false; } int reducers = conf.getInt(MRConfiguration.REDUCE_TASKS, 1); log.info("No of reducers: " + reducers); if (reducers > 1) { return false; } return true; }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.java
License:Apache License
protected List<InputSplit> getPigSplits(List<InputSplit> oneInputSplits, int inputIndex, ArrayList<OperatorKey> targetOps, long blockSize, boolean combinable, Configuration conf) throws IOException, InterruptedException { ArrayList<InputSplit> pigSplits = new ArrayList<InputSplit>(); if (!combinable) { int splitIndex = 0; for (InputSplit inputSplit : oneInputSplits) { PigSplit pigSplit = new PigSplit(new InputSplit[] { inputSplit }, inputIndex, targetOps, splitIndex++);// w w w.j a v a 2 s. co m pigSplit.setConf(conf); pigSplits.add(pigSplit); } return pigSplits; } else { long maxCombinedSplitSize = conf.getLong("pig.maxCombinedSplitSize", 0); if (maxCombinedSplitSize == 0) // default is the block size maxCombinedSplitSize = blockSize; List<List<InputSplit>> combinedSplits = MapRedUtil.getCombinePigSplits(oneInputSplits, maxCombinedSplitSize, conf); for (int i = 0; i < combinedSplits.size(); i++) pigSplits.add(createPigSplit(combinedSplits.get(i), inputIndex, targetOps, i, conf)); return pigSplits; } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputSplitFormat.java
License:Apache License
protected List<InputSplit> getPigSplits(List<InputSplit> oneInputSplits, int inputIndex, ArrayList<OperatorKey> targetOps, long blockSize, boolean combinable, Configuration conf) throws IOException, InterruptedException { ArrayList<InputSplit> pigSplits = new ArrayList<InputSplit>(); if (!combinable) { int splitIndex = 0; for (InputSplit inputSplit : oneInputSplits) { PigSplit pigSplit = new PigSplit(new InputSplit[] { inputSplit }, inputIndex, targetOps, splitIndex++);//from www.j a v a2 s .co m pigSplit.setConf(conf); pigSplits.add(pigSplit); } return pigSplits; } else { long maxCombinedSplitSize = conf.getLong("pig.maxCombinedSplitSize", 0); if (maxCombinedSplitSize == 0) // default is the block size maxCombinedSplitSize = 1024; List<List<InputSplit>> combinedSplits = MapRedUtil.getCombinePigSplits(oneInputSplits, maxCombinedSplitSize, conf); for (int i = 0; i < combinedSplits.size(); i++) pigSplits.add(createPigSplit(combinedSplits.get(i), inputIndex, targetOps, i, conf)); return pigSplits; } }
From source file:org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.java
License:Apache License
@Override public Result processInput() throws ExecException { // Make sure the reporter is set, because it isn't getting carried // across in the serialization (don't know why). I suspect it's as // cheap to call the setReporter call everytime as to check whether I // have (hopefully java will inline it). if (!initialized) { func.setReporter(getReporter()); func.setPigLogger(pigLogger);// w w w .java 2 s .c o m Configuration jobConf = UDFContext.getUDFContext().getJobConf(); if (jobConf != null) { doTiming = jobConf.getBoolean(TIME_UDFS, false); if (doTiming) { counterGroup = funcSpec.toString(); timingFrequency = jobConf.getLong(TIME_UDFS_FREQUENCY, 100L); } } // We initialize here instead of instantiateFunc because this is called // when actual processing has begun, whereas a function can be instantiated // on the frontend potentially (mainly for optimization) Schema tmpS = func.getInputSchema(); if (tmpS != null) { //Currently, getInstanceForSchema returns null if no class was found. This works fine... //if it is null, the default will be used. We pass the context because if it happens that //the same Schema was generated elsewhere, we do not want to override user expectations inputTupleMaker = SchemaTupleFactory.getInstance(tmpS, false, GenContext.UDF); if (inputTupleMaker == null) { LOG.debug("No SchemaTupleFactory found for Schema [" + tmpS + "], using default TupleFactory"); usingSchemaTupleFactory = false; } else { LOG.debug("Using SchemaTupleFactory for Schema: " + tmpS); usingSchemaTupleFactory = true; } //In the future, we could optionally use SchemaTuples for output as well } if (inputTupleMaker == null) { inputTupleMaker = TupleFactory.getInstance(); } initialized = true; } Result res = new Result(); if (input == null && (inputs == null || inputs.size() == 0)) { res.returnStatus = POStatus.STATUS_EOP; return res; } //Should be removed once the model is clear if (getReporter() != null) { getReporter().progress(); } if (isInputAttached()) { res.result = input; res.returnStatus = POStatus.STATUS_OK; detachInput(); return res; } else { //we decouple this because there may be cases where the size is known and it isn't a schema // tuple factory boolean knownSize = usingSchemaTupleFactory; int knownIndex = 0; res.result = inputTupleMaker.newTuple(); Result temp = null; for (PhysicalOperator op : inputs) { temp = op.getNext(op.getResultType()); if (temp.returnStatus != POStatus.STATUS_OK) { return temp; } if (op instanceof POProject && op.getResultType() == DataType.TUPLE) { POProject projOp = (POProject) op; if (projOp.isProjectToEnd()) { Tuple trslt = (Tuple) temp.result; Tuple rslt = (Tuple) res.result; for (int i = 0; i < trslt.size(); i++) { if (knownSize) { rslt.set(knownIndex++, trslt.get(i)); } else { rslt.append(trslt.get(i)); } } continue; } } if (knownSize) { ((Tuple) res.result).set(knownIndex++, temp.result); } else { ((Tuple) res.result).append(temp.result); } } res.returnStatus = temp.returnStatus; return res; } }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.plan.optimizer.TezOperDependencyParallelismEstimator.java
License:Apache License
@Override public int estimateParallelism(TezOperPlan plan, TezOperator tezOper, Configuration conf) throws IOException { if (tezOper.isVertexGroup()) { return -1; }/* w w w. j ava2 s .c om*/ // TODO: If map opts and reduce opts are same estimate higher parallelism // for tasks based on the count of number of map tasks else be conservative as now maxTaskCount = conf.getInt(PigReducerEstimator.MAX_REDUCER_COUNT_PARAM, PigReducerEstimator.DEFAULT_MAX_REDUCER_COUNT_PARAM); bytesPerReducer = conf.getLong(PigReducerEstimator.BYTES_PER_REDUCER_PARAM, PigReducerEstimator.DEFAULT_BYTES_PER_REDUCER); // If parallelism is set explicitly, respect it if (!tezOper.isIntermediateReducer() && tezOper.getRequestedParallelism() != -1) { return tezOper.getRequestedParallelism(); } // If we have already estimated parallelism, use that one if (tezOper.getEstimatedParallelism() != -1) { return tezOper.getEstimatedParallelism(); } List<TezOperator> preds = plan.getPredecessors(tezOper); if (preds == null) { throw new IOException("Cannot estimate parallelism for source vertex"); } double estimatedParallelism = 0; for (Entry<OperatorKey, TezEdgeDescriptor> entry : tezOper.inEdges.entrySet()) { TezOperator pred = getPredecessorWithKey(plan, tezOper, entry.getKey().toString()); // Don't include broadcast edge, broadcast edge is used for // replicated join (covered in TezParallelismFactorVisitor.visitFRJoin) // and sample/scalar (does not impact parallelism) if (entry.getValue().dataMovementType == DataMovementType.SCATTER_GATHER || entry.getValue().dataMovementType == DataMovementType.ONE_TO_ONE) { double predParallelism = pred.getEffectiveParallelism(pc.defaultParallel); if (predParallelism == -1) { throw new IOException("Cannot estimate parallelism for " + tezOper.getOperatorKey().toString() + ", effective parallelism for predecessor " + tezOper.getOperatorKey().toString() + " is -1"); } //For cases like Union we can just limit to sum of pred vertices parallelism boolean applyFactor = !tezOper.isUnion(); if (!pred.isVertexGroup() && applyFactor) { predParallelism = predParallelism * pred.getParallelismFactor(tezOper); if (pred.getTotalInputFilesSize() > 0) { // Estimate similar to mapreduce and use the maximum of two int parallelismBySize = (int) Math .ceil((double) pred.getTotalInputFilesSize() / bytesPerReducer); predParallelism = Math.max(predParallelism, parallelismBySize); } } estimatedParallelism += predParallelism; } } int roundedEstimatedParallelism = (int) Math.ceil(estimatedParallelism); if (tezOper.isIntermediateReducer() && tezOper.isOverrideIntermediateParallelism()) { // Estimated reducers should not be more than the configured limit roundedEstimatedParallelism = Math.min(roundedEstimatedParallelism, maxTaskCount); int userSpecifiedParallelism = pc.defaultParallel; if (tezOper.getRequestedParallelism() != -1) { userSpecifiedParallelism = tezOper.getRequestedParallelism(); } int intermediateParallelism = Math.max(userSpecifiedParallelism, roundedEstimatedParallelism); if (userSpecifiedParallelism != -1 && (intermediateParallelism > 200 && intermediateParallelism > (2 * userSpecifiedParallelism))) { // Estimated reducers shall not be more than 2x of requested parallelism // if greater than 200 and we are overriding user specified values intermediateParallelism = 2 * userSpecifiedParallelism; } roundedEstimatedParallelism = intermediateParallelism; } else { roundedEstimatedParallelism = Math.min(roundedEstimatedParallelism, maxTaskCount); } if (roundedEstimatedParallelism == 0) { roundedEstimatedParallelism = 1; // We need to produce empty output file } return roundedEstimatedParallelism; }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder.java
License:Apache License
private Vertex newVertex(TezOperator tezOp, boolean isMap) throws IOException, ClassNotFoundException, InterruptedException { ProcessorDescriptor procDesc = ProcessorDescriptor.create(tezOp.getProcessorName()); // Pass physical plans to vertex as user payload. JobConf payloadConf = new JobConf(ConfigurationUtil.toConfiguration(pc.getProperties(), false)); // We do this so that dag.getCredentials(), job.getCredentials(), // job.getConfiguration().getCredentials() all reference the same Credentials object // Unfortunately there is no setCredentials() on Job payloadConf.setCredentials(dag.getCredentials()); // We won't actually use this job, but we need it to talk with the Load Store funcs @SuppressWarnings("deprecation") Job job = new Job(payloadConf); payloadConf = (JobConf) job.getConfiguration(); if (tezOp.sampleOperator != null) { payloadConf.set(PigProcessor.SAMPLE_VERTEX, tezOp.sampleOperator.getOperatorKey().toString()); }/*from www . j av a 2 s .c o m*/ if (tezOp.sortOperator != null) { payloadConf.set(PigProcessor.SORT_VERTEX, tezOp.sortOperator.getOperatorKey().toString()); } String tmp; long maxCombinedSplitSize = 0; if (!tezOp.combineSmallSplits() || pc.getProperties().getProperty(PigConfiguration.PIG_SPLIT_COMBINATION, "true").equals("false")) payloadConf.setBoolean(PigConfiguration.PIG_NO_SPLIT_COMBINATION, true); else if ((tmp = pc.getProperties().getProperty(PigConfiguration.PIG_MAX_COMBINED_SPLIT_SIZE, null)) != null) { try { maxCombinedSplitSize = Long.parseLong(tmp); } catch (NumberFormatException e) { log.warn( "Invalid numeric format for pig.maxCombinedSplitSize; use the default maximum combined split size"); } } if (maxCombinedSplitSize > 0) payloadConf.setLong("pig.maxCombinedSplitSize", maxCombinedSplitSize); payloadConf.set("pig.inputs", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInp())); payloadConf.set("pig.inpSignatures", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpSignatureLists())); payloadConf.set("pig.inpLimits", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpLimits())); // Process stores LinkedList<POStore> stores = processStores(tezOp, payloadConf, job); payloadConf.set("pig.pigContext", ObjectSerializer.serialize(pc)); payloadConf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList())); payloadConf.set("exectype", "TEZ"); payloadConf.setBoolean(MRConfiguration.MAPPER_NEW_API, true); payloadConf.setClass(MRConfiguration.INPUTFORMAT_CLASS, PigInputFormat.class, InputFormat.class); // Set parent plan for all operators in the Tez plan. new PhyPlanSetter(tezOp.plan).visit(); // Set the endOfAllInput flag on the physical plan if certain operators that // use this property (such as STREAM) are present in the plan. EndOfAllInputSetter.EndOfAllInputChecker checker = new EndOfAllInputSetter.EndOfAllInputChecker(tezOp.plan); checker.visit(); if (checker.isEndOfAllInputPresent()) { payloadConf.set(JobControlCompiler.END_OF_INP_IN_MAP, "true"); } // Configure the classes for incoming shuffles to this TezOp // TODO: Refactor out resetting input keys, PIG-3957 List<PhysicalOperator> roots = tezOp.plan.getRoots(); if (roots.size() == 1 && roots.get(0) instanceof POPackage) { POPackage pack = (POPackage) roots.get(0); List<PhysicalOperator> succsList = tezOp.plan.getSuccessors(pack); if (succsList != null) { succsList = new ArrayList<PhysicalOperator>(succsList); } byte keyType = pack.getPkgr().getKeyType(); tezOp.plan.remove(pack); payloadConf.set("pig.reduce.package", ObjectSerializer.serialize(pack)); setIntermediateOutputKeyValue(keyType, payloadConf, tezOp); POShuffleTezLoad newPack; newPack = new POShuffleTezLoad(pack); if (tezOp.isSkewedJoin()) { newPack.setSkewedJoins(true); } tezOp.plan.add(newPack); // Set input keys for POShuffleTezLoad. This is used to identify // the inputs that are attached to the POShuffleTezLoad in the // backend. Map<Integer, String> localRearrangeMap = new TreeMap<Integer, String>(); for (TezOperator pred : mPlan.getPredecessors(tezOp)) { if (tezOp.sampleOperator != null && tezOp.sampleOperator == pred) { // skip sample vertex input } else { String inputKey = pred.getOperatorKey().toString(); if (pred.isVertexGroup()) { pred = mPlan.getOperator(pred.getVertexGroupMembers().get(0)); } LinkedList<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(pred.plan, POLocalRearrangeTez.class); for (POLocalRearrangeTez lr : lrs) { if (lr.isConnectedToPackage() && lr.getOutputKey().equals(tezOp.getOperatorKey().toString())) { localRearrangeMap.put((int) lr.getIndex(), inputKey); } } } } for (Map.Entry<Integer, String> entry : localRearrangeMap.entrySet()) { newPack.addInputKey(entry.getValue()); } if (succsList != null) { for (PhysicalOperator succs : succsList) { tezOp.plan.connect(newPack, succs); } } setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp); } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) { POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0); // TODO Need to fix multiple input key mapping TezOperator identityInOutPred = null; for (TezOperator pred : mPlan.getPredecessors(tezOp)) { if (!pred.isSampleAggregation()) { identityInOutPred = pred; break; } } identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString()); } else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) { POValueInputTez valueInput = (POValueInputTez) roots.get(0); LinkedList<String> scalarInputs = new LinkedList<String>(); for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class)) { if (userFunc.getFunc() instanceof ReadScalarsTez) { scalarInputs.add(((ReadScalarsTez) userFunc.getFunc()).getTezInputs()[0]); } } // Make sure we don't find the scalar for (TezOperator pred : mPlan.getPredecessors(tezOp)) { if (!scalarInputs.contains(pred.getOperatorKey().toString())) { valueInput.setInputKey(pred.getOperatorKey().toString()); break; } } } JobControlCompiler.setOutputFormat(job); // set parent plan in all operators. currently the parent plan is really // used only when POStream, POSplit are present in the plan new PhyPlanSetter(tezOp.plan).visit(); // Serialize the execution plan payloadConf.set(PigProcessor.PLAN, ObjectSerializer.serialize(tezOp.plan)); UDFContext.getUDFContext().serialize(payloadConf); MRToTezHelper.processMRSettings(payloadConf, globalConf); if (!pc.inIllustrator) { for (POStore store : stores) { // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized store.setInputs(null); store.setParentPlan(null); } // We put them in the reduce because PigOutputCommitter checks the // ID of the task to see if it's a map, and if not, calls the reduce // committers. payloadConf.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(new ArrayList<POStore>())); payloadConf.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(stores)); } if (tezOp.isNeedEstimateParallelism()) { payloadConf.setBoolean(PigProcessor.ESTIMATE_PARALLELISM, true); log.info("Estimate quantile for sample aggregation vertex " + tezOp.getOperatorKey().toString()); } // Take our assembled configuration and create a vertex UserPayload userPayload = TezUtils.createUserPayloadFromConf(payloadConf); procDesc.setUserPayload(userPayload); Vertex vertex = Vertex.create(tezOp.getOperatorKey().toString(), procDesc, tezOp.getVertexParallelism(), isMap ? MRHelpers.getResourceForMRMapper(globalConf) : MRHelpers.getResourceForMRReducer(globalConf)); Map<String, String> taskEnv = new HashMap<String, String>(); MRHelpers.updateEnvBasedOnMRTaskEnv(globalConf, taskEnv, isMap); vertex.setTaskEnvironment(taskEnv); // All these classes are @InterfaceAudience.Private in Hadoop. Switch to Tez methods in TEZ-1012 // set the timestamps, public/private visibility of the archives and files ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(globalConf); // get DelegationToken for each cached file ClientDistributedCacheManager.getDelegationTokens(globalConf, job.getCredentials()); MRApps.setupDistributedCache(globalConf, localResources); vertex.addTaskLocalFiles(localResources); vertex.setTaskLaunchCmdOpts(isMap ? MRHelpers.getJavaOptsForMRMapper(globalConf) : MRHelpers.getJavaOptsForMRReducer(globalConf)); log.info("For vertex - " + tezOp.getOperatorKey().toString() + ": parallelism=" + tezOp.getVertexParallelism() + ", memory=" + vertex.getTaskResource().getMemory() + ", java opts=" + vertex.getTaskLaunchCmdOpts()); // Right now there can only be one of each of these. Will need to be // more generic when there can be more. for (POLoad ld : tezOp.getLoaderInfo().getLoads()) { // TODO: These should get the globalConf, or a merged version that // keeps settings like pig.maxCombinedSplitSize vertex.setLocationHint( VertexLocationHint.create(tezOp.getLoaderInfo().getInputSplitInfo().getTaskLocationHints())); vertex.addDataSource(ld.getOperatorKey().toString(), DataSourceDescriptor.create( InputDescriptor.create(MRInput.class.getName()) .setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder() .setConfigurationBytes(TezUtils.createByteStringFromConf(payloadConf)) .setSplits(tezOp.getLoaderInfo().getInputSplitInfo().getSplitsProto()).build() .toByteString().asReadOnlyByteBuffer())), InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()), dag.getCredentials())); } for (POStore store : stores) { ArrayList<POStore> emptyList = new ArrayList<POStore>(); ArrayList<POStore> singleStore = new ArrayList<POStore>(); singleStore.add(store); Configuration outputPayLoad = new Configuration(payloadConf); outputPayLoad.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(emptyList)); outputPayLoad.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(singleStore)); OutputDescriptor storeOutDescriptor = OutputDescriptor.create(MROutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(outputPayLoad)); if (tezOp.getVertexGroupStores() != null) { OperatorKey vertexGroupKey = tezOp.getVertexGroupStores().get(store.getOperatorKey()); if (vertexGroupKey != null) { getPlan().getOperator(vertexGroupKey).getVertexGroupInfo() .setStoreOutputDescriptor(storeOutDescriptor); continue; } } vertex.addDataSink(store.getOperatorKey().toString(), new DataSinkDescriptor(storeOutDescriptor, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), dag.getCredentials())); } // LoadFunc and StoreFunc add delegation tokens to Job Credentials in // setLocation and setStoreLocation respectively. For eg: HBaseStorage // InputFormat add delegation token in getSplits and OutputFormat in // checkOutputSpecs. For eg: FileInputFormat and FileOutputFormat if (stores.size() > 0) { new PigOutputFormat().checkOutputSpecs(job); } // Set the right VertexManagerPlugin if (tezOp.getEstimatedParallelism() != -1) { if (tezOp.isGlobalSort() || tezOp.isSkewedJoin()) { // Set VertexManagerPlugin to PartitionerDefinedVertexManager, which is able // to decrease/increase parallelism of sorting vertex dynamically // based on the numQuantiles calculated by sample aggregation vertex vertex.setVertexManagerPlugin( VertexManagerPluginDescriptor.create(PartitionerDefinedVertexManager.class.getName())); log.info("Set VertexManagerPlugin to PartitionerDefinedParallelismVertexManager for vertex " + tezOp.getOperatorKey().toString()); } else { boolean containScatterGather = false; boolean containCustomPartitioner = false; for (TezEdgeDescriptor edge : tezOp.inEdges.values()) { if (edge.dataMovementType == DataMovementType.SCATTER_GATHER) { containScatterGather = true; } if (edge.partitionerClass != null) { containCustomPartitioner = true; } } if (containScatterGather && !containCustomPartitioner) { // Use auto-parallelism feature of ShuffleVertexManager to dynamically // reduce the parallelism of the vertex VertexManagerPluginDescriptor vmPluginDescriptor = VertexManagerPluginDescriptor .create(ShuffleVertexManager.class.getName()); Configuration vmPluginConf = ConfigurationUtil.toConfiguration(pc.getProperties(), false); vmPluginConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true); if (vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM, InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) != InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) { vmPluginConf.setLong( ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM, InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER)); } vmPluginDescriptor.setUserPayload(TezUtils.createUserPayloadFromConf(vmPluginConf)); vertex.setVertexManagerPlugin(vmPluginDescriptor); log.info("Set auto parallelism for vertex " + tezOp.getOperatorKey().toString()); } } } // Reset udfcontext jobconf. It is not supposed to be set in the front end UDFContext.getUDFContext().addJobConf(null); return vertex; }
From source file:org.apache.pig.CounterBasedErrorHandler.java
License:Apache License
public CounterBasedErrorHandler() { Configuration conf = UDFContext.getUDFContext().getJobConf(); this.minErrors = conf.getLong(PigConfiguration.PIG_ERROR_HANDLING_MIN_ERROR_RECORDS, 0); this.errorThreshold = conf.getFloat(PigConfiguration.PIG_ERROR_HANDLING_THRESHOLD_PERCENT, 0.0f); }
From source file:org.apache.pig.impl.util.SpillableMemoryManager.java
License:Apache License
public void configure(Configuration conf) { spillFileSizeThreshold = conf.getLong("pig.spill.size.threshold", spillFileSizeThreshold); gcActivationSize = conf.getLong("pig.spill.gc.activation.size", gcActivationSize); float memoryThresholdFraction = conf.getFloat(PigConfiguration.PIG_SPILL_MEMORY_USAGE_THRESHOLD_FRACTION, MEMORY_THRESHOLD_FRACTION_DEFAULT); float collectionThresholdFraction = conf.getFloat(PigConfiguration.PIG_SPILL_COLLECTION_THRESHOLD_FRACTION, COLLECTION_THRESHOLD_FRACTION_DEFAULT); long unusedMemoryThreshold = conf.getLong(PigConfiguration.PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE, UNUSED_MEMORY_THRESHOLD_DEFAULT); configureMemoryThresholds(memoryThresholdFraction, collectionThresholdFraction, unusedMemoryThreshold); }
From source file:org.apache.pig.test.Util.java
License:Apache License
private static void assertConfLong(Configuration conf, String param, long expected) { assertEquals("Unexpected value found in configs for " + param, expected, conf.getLong(param, -1)); }
From source file:org.apache.rya.indexing.accumulo.ConfigUtils.java
License:Apache License
public static long getWriterMaxLatency(final Configuration conf) { return conf.getLong(CLOUDBASE_WRITER_MAX_LATENCY, WRITER_MAX_LATNECY); }