List of usage examples for org.apache.hadoop.mapreduce Job getCredentials
public Credentials getCredentials()
From source file:org.apache.hcatalog.pig.HCatStorer.java
License:Apache License
@Override public void setStoreLocation(String location, Job job) throws IOException { HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, false); Configuration config = job.getConfiguration(); config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign); Properties udfProps = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { sign }); String[] userStr = location.split("\\."); if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) { for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) { PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString()); }/*w w w . j a va2 s . c o m*/ Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign); if (crd != null) { job.getCredentials().addAll(crd); } } else { Job clone = new Job(job.getConfiguration()); OutputJobInfo outputJobInfo; if (userStr.length == 2) { outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions); } else if (userStr.length == 1) { outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions); } else { throw new FrontendException( "location " + location + " is invalid. It must be of the form [db.]table", PigHCatUtil.PIG_EXCEPTION_CODE); } Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA)); if (schema != null) { pigSchema = schema; } if (pigSchema == null) { throw new FrontendException("Schema for data cannot be determined.", PigHCatUtil.PIG_EXCEPTION_CODE); } String externalLocation = (String) udfProps .getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION); if (externalLocation != null) { outputJobInfo.setLocation(externalLocation); } try { HCatOutputFormat.setOutput(job, outputJobInfo); } catch (HCatException he) { // pass the message to the user - essentially something about // the table // information passed to HCatOutputFormat was not right throw new PigException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); } HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job); try { doSchemaValidations(pigSchema, hcatTblSchema); } catch (HCatException he) { throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); } computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema); HCatOutputFormat.setSchema(job, computedSchema); udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema)); // We will store all the new /changed properties in the job in the // udf context, so the the HCatOutputFormat.setOutput and setSchema // methods need not be called many times. for (Entry<String, String> keyValue : job.getConfiguration()) { String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { udfProps.put(keyValue.getKey(), keyValue.getValue()); } } //Store credentials in a private hash map and not the udf context to // make sure they are not public. jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials()); udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true); } }
From source file:org.apache.hcatalog.templeton.tool.TempletonControllerJob.java
License:Apache License
/** * Enqueue the job and print out the job id for later collection. *///from w w w. jav a 2s.co m @Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args)); conf.set("user.name", UserGroupInformation.getCurrentUser().getShortUserName()); Job job = new Job(conf); job.setJarByClass(TempletonControllerJob.class); job.setJobName("TempletonControllerJob"); job.setMapperClass(LaunchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(SingleInputFormat.class); NullOutputFormat<NullWritable, NullWritable> of = new NullOutputFormat<NullWritable, NullWritable>(); job.setOutputFormatClass(of.getClass()); job.setNumReduceTasks(0); JobClient jc = new JobClient(new JobConf(job.getConfiguration())); Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token")); job.getCredentials().addToken(new Text("mr token"), mrdt); job.submit(); submittedJobId = job.getJobID(); return 0; }
From source file:org.apache.hive.hcatalog.mapreduce.Security.java
License:Apache License
void handleSecurity(Job job, OutputJobInfo outputJobInfo, IMetaStoreClient client, Configuration conf, boolean harRequested) throws IOException, MetaException, TException, Exception { handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested); }
From source file:org.apache.hive.hcatalog.pig.HCatLoader.java
License:Apache License
@Override public void setLocation(String location, Job job) throws IOException { HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); UDFContext udfContext = UDFContext.getUDFContext(); Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature }); job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); Pair<String, String> dbTablePair = PigHCatUtil.getDBTableNames(location); dbName = dbTablePair.first;// w ww . j av a 2 s .c o m tableName = dbTablePair.second; RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO); // get partitionFilterString stored in the UDFContext - it would have // been stored there by an earlier call to setPartitionFilter // call setInput on HCatInputFormat only in the frontend because internally // it makes calls to the hcat server - we don't want these to happen in // the backend // in the hadoop front end mapred.task.id property will not be set in // the Configuration if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) { PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(), emr.nextElement().toString()); } if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { //Combine credentials and credentials from job takes precedence for freshness Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); job.getCredentials().addAll(crd); } } else { Job clone = new Job(job.getConfiguration()); HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString()); InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil .deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); SpecialCases.addSpecialCasesParametersForHCatLoader(job.getConfiguration(), inputJobInfo.getTableInfo()); // We will store all the new /changed properties in the job in the // udf context, so the the HCatInputFormat.setInput method need not //be called many times. for (Entry<String, String> keyValue : job.getConfiguration()) { String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { udfProps.put(keyValue.getKey(), keyValue.getValue()); } } udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); //Store credentials in a private hash map and not the udf context to // make sure they are not public. Credentials crd = new Credentials(); crd.addAll(job.getCredentials()); jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); } // Need to also push projections by calling setOutputSchema on // HCatInputFormat - we have to get the RequiredFields information // from the UdfContext, translate it to an Schema and then pass it // The reason we do this here is because setLocation() is called by // Pig runtime at InputFormat.getSplits() and // InputFormat.createRecordReader() time - we are not sure when // HCatInputFormat needs to know about pruned projections - so doing it // here will ensure we communicate to HCatInputFormat about pruned // projections at getSplits() and createRecordReader() time if (requiredFieldsInfo != null) { // convert to hcatschema and pass to HCatInputFormat try { //push down projections to columnar store works for RCFile and ORCFile ArrayList<Integer> list = new ArrayList<Integer>(requiredFieldsInfo.getFields().size()); for (RequiredField rf : requiredFieldsInfo.getFields()) { list.add(rf.getIndex()); } ColumnProjectionUtils.setReadColumns(job.getConfiguration(), list); outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } else { // else - this means pig's optimizer never invoked the pushProjection // method - so we need all fields and hence we should not call the // setOutputSchema on HCatInputFormat ColumnProjectionUtils.setReadAllColumns(job.getConfiguration()); if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { try { HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); outputSchema = hcatTableSchema; HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } } if (LOG.isDebugEnabled()) { LOG.debug("outputSchema=" + outputSchema); } }
From source file:org.apache.hive.hcatalog.pig.HCatStorer.java
License:Apache License
/** * @param location databaseName.tableName *///ww w. j a va 2 s . c om @Override public void setStoreLocation(String location, Job job) throws IOException { Configuration config = job.getConfiguration(); config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign); Properties udfProps = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { sign }); String[] userStr = location.split("\\."); if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) { for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) { PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString()); } Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign); if (crd != null) { job.getCredentials().addAll(crd); } } else { Job clone = new Job(job.getConfiguration()); OutputJobInfo outputJobInfo; if (userStr.length == 2) { outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions); } else if (userStr.length == 1) { outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions); } else { throw new FrontendException( "location " + location + " is invalid. It must be of the form [db.]table", PigHCatUtil.PIG_EXCEPTION_CODE); } Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA)); if (schema != null) { pigSchema = schema; } if (pigSchema == null) { throw new FrontendException("Schema for data cannot be determined.", PigHCatUtil.PIG_EXCEPTION_CODE); } String externalLocation = (String) udfProps .getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION); if (externalLocation != null) { outputJobInfo.setLocation(externalLocation); } try { HCatOutputFormat.setOutput(job, outputJobInfo); } catch (HCatException he) { // pass the message to the user - essentially something about // the table // information passed to HCatOutputFormat was not right throw new PigException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); } HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job.getConfiguration()); try { doSchemaValidations(pigSchema, hcatTblSchema); } catch (HCatException he) { throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); } computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema); HCatOutputFormat.setSchema(job, computedSchema); udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema)); // We will store all the new /changed properties in the job in the // udf context, so the the HCatOutputFormat.setOutput and setSchema // methods need not be called many times. for (Entry<String, String> keyValue : job.getConfiguration()) { String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { udfProps.put(keyValue.getKey(), keyValue.getValue()); } } //Store credentials in a private hash map and not the udf context to // make sure they are not public. jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials()); udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true); } }
From source file:org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob.java
License:Apache License
/** * Enqueue the job and print out the job id for later collection. * @see org.apache.hive.hcatalog.templeton.CompleteDelegator *//* www . j a v a 2 s . com*/ @Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, TException { if (LOG.isDebugEnabled()) { LOG.debug("Preparing to submit job: " + Arrays.toString(args)); } Configuration conf = getConf(); conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args)); String memoryMb = appConf.mapperMemoryMb(); if (memoryMb != null && memoryMb.length() != 0) { conf.set(AppConfig.HADOOP_MAP_MEMORY_MB, memoryMb); } String amMemoryMB = appConf.amMemoryMb(); if (amMemoryMB != null && !amMemoryMB.isEmpty()) { conf.set(AppConfig.HADOOP_MR_AM_MEMORY_MB, amMemoryMB); } String amJavaOpts = appConf.controllerAMChildOpts(); if (amJavaOpts != null && !amJavaOpts.isEmpty()) { conf.set(AppConfig.HADOOP_MR_AM_JAVA_OPTS, amJavaOpts); } String user = UserGroupInformation.getCurrentUser().getShortUserName(); conf.set("user.name", user); Job job = new Job(conf); job.setJarByClass(LaunchMapper.class); job.setJobName(TempletonControllerJob.class.getSimpleName()); job.setMapperClass(LaunchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(SingleInputFormat.class); NullOutputFormat<NullWritable, NullWritable> of = new NullOutputFormat<NullWritable, NullWritable>(); job.setOutputFormatClass(of.getClass()); job.setNumReduceTasks(0); JobClient jc = new JobClient(new JobConf(job.getConfiguration())); if (UserGroupInformation.isSecurityEnabled()) { Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token")); job.getCredentials().addToken(new Text("mr token"), mrdt); } String metastoreTokenStrForm = addHMSToken(job, user); job.submit(); submittedJobId = job.getJobID(); if (metastoreTokenStrForm != null) { //so that it can be cancelled later from CompleteDelegator DelegationTokenCache.getStringFormTokenCache().storeDelegationToken(submittedJobId.toString(), metastoreTokenStrForm); LOG.debug("Added metastore delegation token for jobId=" + submittedJobId.toString() + " user=" + user); } return 0; }
From source file:org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob.java
License:Apache License
private String addHMSToken(Job job, String user) throws IOException, InterruptedException, TException { if (!secureMetastoreAccess) { return null; }/* w w w .j a v a2 s. c om*/ Token<org.apache.hadoop.hive.thrift.DelegationTokenIdentifier> hiveToken = new Token<org.apache.hadoop.hive.thrift.DelegationTokenIdentifier>(); String metastoreTokenStrForm = buildHcatDelegationToken(user); hiveToken.decodeFromUrlString(metastoreTokenStrForm); job.getCredentials().addToken(new Text(SecureProxySupport.HCAT_SERVICE), hiveToken); return metastoreTokenStrForm; }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder.java
License:Apache License
private Vertex newVertex(TezOperator tezOp, boolean isMap) throws IOException, ClassNotFoundException, InterruptedException { ProcessorDescriptor procDesc = ProcessorDescriptor.create(tezOp.getProcessorName()); // Pass physical plans to vertex as user payload. JobConf payloadConf = new JobConf(ConfigurationUtil.toConfiguration(pc.getProperties(), false)); // We do this so that dag.getCredentials(), job.getCredentials(), // job.getConfiguration().getCredentials() all reference the same Credentials object // Unfortunately there is no setCredentials() on Job payloadConf.setCredentials(dag.getCredentials()); // We won't actually use this job, but we need it to talk with the Load Store funcs @SuppressWarnings("deprecation") Job job = new Job(payloadConf); payloadConf = (JobConf) job.getConfiguration(); if (tezOp.sampleOperator != null) { payloadConf.set(PigProcessor.SAMPLE_VERTEX, tezOp.sampleOperator.getOperatorKey().toString()); }/* w ww . java2 s.c o m*/ if (tezOp.sortOperator != null) { payloadConf.set(PigProcessor.SORT_VERTEX, tezOp.sortOperator.getOperatorKey().toString()); } String tmp; long maxCombinedSplitSize = 0; if (!tezOp.combineSmallSplits() || pc.getProperties().getProperty(PigConfiguration.PIG_SPLIT_COMBINATION, "true").equals("false")) payloadConf.setBoolean(PigConfiguration.PIG_NO_SPLIT_COMBINATION, true); else if ((tmp = pc.getProperties().getProperty(PigConfiguration.PIG_MAX_COMBINED_SPLIT_SIZE, null)) != null) { try { maxCombinedSplitSize = Long.parseLong(tmp); } catch (NumberFormatException e) { log.warn( "Invalid numeric format for pig.maxCombinedSplitSize; use the default maximum combined split size"); } } if (maxCombinedSplitSize > 0) payloadConf.setLong("pig.maxCombinedSplitSize", maxCombinedSplitSize); payloadConf.set("pig.inputs", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInp())); payloadConf.set("pig.inpSignatures", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpSignatureLists())); payloadConf.set("pig.inpLimits", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpLimits())); // Process stores LinkedList<POStore> stores = processStores(tezOp, payloadConf, job); payloadConf.set("pig.pigContext", ObjectSerializer.serialize(pc)); payloadConf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList())); payloadConf.set("exectype", "TEZ"); payloadConf.setBoolean(MRConfiguration.MAPPER_NEW_API, true); payloadConf.setClass(MRConfiguration.INPUTFORMAT_CLASS, PigInputFormat.class, InputFormat.class); // Set parent plan for all operators in the Tez plan. new PhyPlanSetter(tezOp.plan).visit(); // Set the endOfAllInput flag on the physical plan if certain operators that // use this property (such as STREAM) are present in the plan. EndOfAllInputSetter.EndOfAllInputChecker checker = new EndOfAllInputSetter.EndOfAllInputChecker(tezOp.plan); checker.visit(); if (checker.isEndOfAllInputPresent()) { payloadConf.set(JobControlCompiler.END_OF_INP_IN_MAP, "true"); } // Configure the classes for incoming shuffles to this TezOp // TODO: Refactor out resetting input keys, PIG-3957 List<PhysicalOperator> roots = tezOp.plan.getRoots(); if (roots.size() == 1 && roots.get(0) instanceof POPackage) { POPackage pack = (POPackage) roots.get(0); List<PhysicalOperator> succsList = tezOp.plan.getSuccessors(pack); if (succsList != null) { succsList = new ArrayList<PhysicalOperator>(succsList); } byte keyType = pack.getPkgr().getKeyType(); tezOp.plan.remove(pack); payloadConf.set("pig.reduce.package", ObjectSerializer.serialize(pack)); setIntermediateOutputKeyValue(keyType, payloadConf, tezOp); POShuffleTezLoad newPack; newPack = new POShuffleTezLoad(pack); if (tezOp.isSkewedJoin()) { newPack.setSkewedJoins(true); } tezOp.plan.add(newPack); // Set input keys for POShuffleTezLoad. This is used to identify // the inputs that are attached to the POShuffleTezLoad in the // backend. Map<Integer, String> localRearrangeMap = new TreeMap<Integer, String>(); for (TezOperator pred : mPlan.getPredecessors(tezOp)) { if (tezOp.sampleOperator != null && tezOp.sampleOperator == pred) { // skip sample vertex input } else { String inputKey = pred.getOperatorKey().toString(); if (pred.isVertexGroup()) { pred = mPlan.getOperator(pred.getVertexGroupMembers().get(0)); } LinkedList<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(pred.plan, POLocalRearrangeTez.class); for (POLocalRearrangeTez lr : lrs) { if (lr.isConnectedToPackage() && lr.getOutputKey().equals(tezOp.getOperatorKey().toString())) { localRearrangeMap.put((int) lr.getIndex(), inputKey); } } } } for (Map.Entry<Integer, String> entry : localRearrangeMap.entrySet()) { newPack.addInputKey(entry.getValue()); } if (succsList != null) { for (PhysicalOperator succs : succsList) { tezOp.plan.connect(newPack, succs); } } setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp); } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) { POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0); // TODO Need to fix multiple input key mapping TezOperator identityInOutPred = null; for (TezOperator pred : mPlan.getPredecessors(tezOp)) { if (!pred.isSampleAggregation()) { identityInOutPred = pred; break; } } identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString()); } else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) { POValueInputTez valueInput = (POValueInputTez) roots.get(0); LinkedList<String> scalarInputs = new LinkedList<String>(); for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class)) { if (userFunc.getFunc() instanceof ReadScalarsTez) { scalarInputs.add(((ReadScalarsTez) userFunc.getFunc()).getTezInputs()[0]); } } // Make sure we don't find the scalar for (TezOperator pred : mPlan.getPredecessors(tezOp)) { if (!scalarInputs.contains(pred.getOperatorKey().toString())) { valueInput.setInputKey(pred.getOperatorKey().toString()); break; } } } JobControlCompiler.setOutputFormat(job); // set parent plan in all operators. currently the parent plan is really // used only when POStream, POSplit are present in the plan new PhyPlanSetter(tezOp.plan).visit(); // Serialize the execution plan payloadConf.set(PigProcessor.PLAN, ObjectSerializer.serialize(tezOp.plan)); UDFContext.getUDFContext().serialize(payloadConf); MRToTezHelper.processMRSettings(payloadConf, globalConf); if (!pc.inIllustrator) { for (POStore store : stores) { // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized store.setInputs(null); store.setParentPlan(null); } // We put them in the reduce because PigOutputCommitter checks the // ID of the task to see if it's a map, and if not, calls the reduce // committers. payloadConf.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(new ArrayList<POStore>())); payloadConf.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(stores)); } if (tezOp.isNeedEstimateParallelism()) { payloadConf.setBoolean(PigProcessor.ESTIMATE_PARALLELISM, true); log.info("Estimate quantile for sample aggregation vertex " + tezOp.getOperatorKey().toString()); } // Take our assembled configuration and create a vertex UserPayload userPayload = TezUtils.createUserPayloadFromConf(payloadConf); procDesc.setUserPayload(userPayload); Vertex vertex = Vertex.create(tezOp.getOperatorKey().toString(), procDesc, tezOp.getVertexParallelism(), isMap ? MRHelpers.getResourceForMRMapper(globalConf) : MRHelpers.getResourceForMRReducer(globalConf)); Map<String, String> taskEnv = new HashMap<String, String>(); MRHelpers.updateEnvBasedOnMRTaskEnv(globalConf, taskEnv, isMap); vertex.setTaskEnvironment(taskEnv); // All these classes are @InterfaceAudience.Private in Hadoop. Switch to Tez methods in TEZ-1012 // set the timestamps, public/private visibility of the archives and files ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(globalConf); // get DelegationToken for each cached file ClientDistributedCacheManager.getDelegationTokens(globalConf, job.getCredentials()); MRApps.setupDistributedCache(globalConf, localResources); vertex.addTaskLocalFiles(localResources); vertex.setTaskLaunchCmdOpts(isMap ? MRHelpers.getJavaOptsForMRMapper(globalConf) : MRHelpers.getJavaOptsForMRReducer(globalConf)); log.info("For vertex - " + tezOp.getOperatorKey().toString() + ": parallelism=" + tezOp.getVertexParallelism() + ", memory=" + vertex.getTaskResource().getMemory() + ", java opts=" + vertex.getTaskLaunchCmdOpts()); // Right now there can only be one of each of these. Will need to be // more generic when there can be more. for (POLoad ld : tezOp.getLoaderInfo().getLoads()) { // TODO: These should get the globalConf, or a merged version that // keeps settings like pig.maxCombinedSplitSize vertex.setLocationHint( VertexLocationHint.create(tezOp.getLoaderInfo().getInputSplitInfo().getTaskLocationHints())); vertex.addDataSource(ld.getOperatorKey().toString(), DataSourceDescriptor.create( InputDescriptor.create(MRInput.class.getName()) .setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder() .setConfigurationBytes(TezUtils.createByteStringFromConf(payloadConf)) .setSplits(tezOp.getLoaderInfo().getInputSplitInfo().getSplitsProto()).build() .toByteString().asReadOnlyByteBuffer())), InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()), dag.getCredentials())); } for (POStore store : stores) { ArrayList<POStore> emptyList = new ArrayList<POStore>(); ArrayList<POStore> singleStore = new ArrayList<POStore>(); singleStore.add(store); Configuration outputPayLoad = new Configuration(payloadConf); outputPayLoad.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(emptyList)); outputPayLoad.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(singleStore)); OutputDescriptor storeOutDescriptor = OutputDescriptor.create(MROutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(outputPayLoad)); if (tezOp.getVertexGroupStores() != null) { OperatorKey vertexGroupKey = tezOp.getVertexGroupStores().get(store.getOperatorKey()); if (vertexGroupKey != null) { getPlan().getOperator(vertexGroupKey).getVertexGroupInfo() .setStoreOutputDescriptor(storeOutDescriptor); continue; } } vertex.addDataSink(store.getOperatorKey().toString(), new DataSinkDescriptor(storeOutDescriptor, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), dag.getCredentials())); } // LoadFunc and StoreFunc add delegation tokens to Job Credentials in // setLocation and setStoreLocation respectively. For eg: HBaseStorage // InputFormat add delegation token in getSplits and OutputFormat in // checkOutputSpecs. For eg: FileInputFormat and FileOutputFormat if (stores.size() > 0) { new PigOutputFormat().checkOutputSpecs(job); } // Set the right VertexManagerPlugin if (tezOp.getEstimatedParallelism() != -1) { if (tezOp.isGlobalSort() || tezOp.isSkewedJoin()) { // Set VertexManagerPlugin to PartitionerDefinedVertexManager, which is able // to decrease/increase parallelism of sorting vertex dynamically // based on the numQuantiles calculated by sample aggregation vertex vertex.setVertexManagerPlugin( VertexManagerPluginDescriptor.create(PartitionerDefinedVertexManager.class.getName())); log.info("Set VertexManagerPlugin to PartitionerDefinedParallelismVertexManager for vertex " + tezOp.getOperatorKey().toString()); } else { boolean containScatterGather = false; boolean containCustomPartitioner = false; for (TezEdgeDescriptor edge : tezOp.inEdges.values()) { if (edge.dataMovementType == DataMovementType.SCATTER_GATHER) { containScatterGather = true; } if (edge.partitionerClass != null) { containCustomPartitioner = true; } } if (containScatterGather && !containCustomPartitioner) { // Use auto-parallelism feature of ShuffleVertexManager to dynamically // reduce the parallelism of the vertex VertexManagerPluginDescriptor vmPluginDescriptor = VertexManagerPluginDescriptor .create(ShuffleVertexManager.class.getName()); Configuration vmPluginConf = ConfigurationUtil.toConfiguration(pc.getProperties(), false); vmPluginConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true); if (vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM, InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) != InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) { vmPluginConf.setLong( ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM, InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER)); } vmPluginDescriptor.setUserPayload(TezUtils.createUserPayloadFromConf(vmPluginConf)); vertex.setVertexManagerPlugin(vmPluginDescriptor); log.info("Set auto parallelism for vertex " + tezOp.getOperatorKey().toString()); } } } // Reset udfcontext jobconf. It is not supposed to be set in the front end UDFContext.getUDFContext().addJobConf(null); return vertex; }
From source file:org.apache.sqoop.job.mr.MRConfigurationUtils.java
License:Apache License
/** * Persist Connector configuration object for link. * * @param job MapReduce job object/*w ww. j a v a 2s . co m*/ * @param obj Configuration object */ public static void setConnectorLinkConfig(Direction type, Job job, Object obj) { switch (type) { case FROM: job.getConfiguration().set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK, obj.getClass().getName()); job.getCredentials().addSecretKey(MR_JOB_CONFIG_FROM_CONNECTOR_LINK_KEY, ConfigUtils.toJson(obj).getBytes()); break; case TO: job.getConfiguration().set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK, obj.getClass().getName()); job.getCredentials().addSecretKey(MR_JOB_CONFIG_TO_CONNECTOR_LINK_KEY, ConfigUtils.toJson(obj).getBytes()); break; } }
From source file:org.apache.sqoop.job.mr.MRConfigurationUtils.java
License:Apache License
/** * Persist Connector configuration objects for job. * * @param job MapReduce job object/* w w w . jav a2s. co m*/ * @param obj Configuration object */ public static void setConnectorJobConfig(Direction type, Job job, Object obj) { switch (type) { case FROM: job.getConfiguration().set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB, obj.getClass().getName()); job.getCredentials().addSecretKey(MR_JOB_CONFIG_FROM_JOB_CONFIG_KEY, ConfigUtils.toJson(obj).getBytes()); break; case TO: job.getConfiguration().set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB, obj.getClass().getName()); job.getCredentials().addSecretKey(MR_JOB_CONFIG_TO_JOB_CONFIG_KEY, ConfigUtils.toJson(obj).getBytes()); break; } }