Example usage for org.apache.hadoop.mapreduce Job getCredentials

List of usage examples for org.apache.hadoop.mapreduce Job getCredentials

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCredentials.

Prototype

public Credentials getCredentials() 

Source Link

Usage

From source file:org.apache.hcatalog.pig.HCatStorer.java

License:Apache License

@Override
public void setStoreLocation(String location, Job job) throws IOException {
    HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get()
            .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, false);

    Configuration config = job.getConfiguration();
    config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign);
    Properties udfProps = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { sign });
    String[] userStr = location.split("\\.");

    if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) {
        for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) {
            PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString());
        }/*w  w  w  . j a va2  s  .  c  o  m*/
        Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign);
        if (crd != null) {
            job.getCredentials().addAll(crd);
        }
    } else {
        Job clone = new Job(job.getConfiguration());
        OutputJobInfo outputJobInfo;
        if (userStr.length == 2) {
            outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions);
        } else if (userStr.length == 1) {
            outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions);
        } else {
            throw new FrontendException(
                    "location " + location + " is invalid. It must be of the form [db.]table",
                    PigHCatUtil.PIG_EXCEPTION_CODE);
        }
        Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA));
        if (schema != null) {
            pigSchema = schema;
        }
        if (pigSchema == null) {
            throw new FrontendException("Schema for data cannot be determined.",
                    PigHCatUtil.PIG_EXCEPTION_CODE);
        }
        String externalLocation = (String) udfProps
                .getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION);
        if (externalLocation != null) {
            outputJobInfo.setLocation(externalLocation);
        }
        try {
            HCatOutputFormat.setOutput(job, outputJobInfo);
        } catch (HCatException he) {
            // pass the message to the user - essentially something about
            // the table
            // information passed to HCatOutputFormat was not right
            throw new PigException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
        }
        HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job);
        try {
            doSchemaValidations(pigSchema, hcatTblSchema);
        } catch (HCatException he) {
            throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
        }
        computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema);
        HCatOutputFormat.setSchema(job, computedSchema);
        udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema));

        // We will store all the new /changed properties in the job in the
        // udf context, so the the HCatOutputFormat.setOutput and setSchema
        // methods need not be called many times.
        for (Entry<String, String> keyValue : job.getConfiguration()) {
            String oldValue = clone.getConfiguration().getRaw(keyValue.getKey());
            if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) {
                udfProps.put(keyValue.getKey(), keyValue.getValue());
            }
        }
        //Store credentials in a private hash map and not the udf context to
        // make sure they are not public.
        jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials());
        udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true);
    }
}

From source file:org.apache.hcatalog.templeton.tool.TempletonControllerJob.java

License:Apache License

/**
 * Enqueue the job and print out the job id for later collection.
 *///from  w w  w.  jav  a 2s.co  m
@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = getConf();
    conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args));
    conf.set("user.name", UserGroupInformation.getCurrentUser().getShortUserName());
    Job job = new Job(conf);
    job.setJarByClass(TempletonControllerJob.class);
    job.setJobName("TempletonControllerJob");
    job.setMapperClass(LaunchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(SingleInputFormat.class);
    NullOutputFormat<NullWritable, NullWritable> of = new NullOutputFormat<NullWritable, NullWritable>();
    job.setOutputFormatClass(of.getClass());
    job.setNumReduceTasks(0);

    JobClient jc = new JobClient(new JobConf(job.getConfiguration()));

    Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token"));
    job.getCredentials().addToken(new Text("mr token"), mrdt);
    job.submit();

    submittedJobId = job.getJobID();

    return 0;
}

From source file:org.apache.hive.hcatalog.mapreduce.Security.java

License:Apache License

void handleSecurity(Job job, OutputJobInfo outputJobInfo, IMetaStoreClient client, Configuration conf,
        boolean harRequested) throws IOException, MetaException, TException, Exception {
    handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested);
}

From source file:org.apache.hive.hcatalog.pig.HCatLoader.java

License:Apache License

@Override
public void setLocation(String location, Job job) throws IOException {
    HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get()
            .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true);

    UDFContext udfContext = UDFContext.getUDFContext();
    Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature });
    job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature);
    Pair<String, String> dbTablePair = PigHCatUtil.getDBTableNames(location);
    dbName = dbTablePair.first;// w ww  . j  av a  2  s .c o  m
    tableName = dbTablePair.second;

    RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO);
    // get partitionFilterString stored in the UDFContext - it would have
    // been stored there by an earlier call to setPartitionFilter
    // call setInput on HCatInputFormat only in the frontend because internally
    // it makes calls to the hcat server - we don't want these to happen in
    // the backend
    // in the hadoop front end mapred.task.id property will not be set in
    // the Configuration
    if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) {
        for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) {
            PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(),
                    emr.nextElement().toString());
        }
        if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) {
            //Combine credentials and credentials from job takes precedence for freshness
            Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature);
            job.getCredentials().addAll(crd);
        }
    } else {
        Job clone = new Job(job.getConfiguration());
        HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString());

        InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil
                .deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO));

        SpecialCases.addSpecialCasesParametersForHCatLoader(job.getConfiguration(),
                inputJobInfo.getTableInfo());

        // We will store all the new /changed properties in the job in the
        // udf context, so the the HCatInputFormat.setInput method need not
        //be called many times.
        for (Entry<String, String> keyValue : job.getConfiguration()) {
            String oldValue = clone.getConfiguration().getRaw(keyValue.getKey());
            if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) {
                udfProps.put(keyValue.getKey(), keyValue.getValue());
            }
        }
        udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true);

        //Store credentials in a private hash map and not the udf context to
        // make sure they are not public.
        Credentials crd = new Credentials();
        crd.addAll(job.getCredentials());
        jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd);
    }

    // Need to also push projections by calling setOutputSchema on
    // HCatInputFormat - we have to get the RequiredFields information
    // from the UdfContext, translate it to an Schema and then pass it
    // The reason we do this here is because setLocation() is called by
    // Pig runtime at InputFormat.getSplits() and
    // InputFormat.createRecordReader() time - we are not sure when
    // HCatInputFormat needs to know about pruned projections - so doing it
    // here will ensure we communicate to HCatInputFormat about pruned
    // projections at getSplits() and createRecordReader() time

    if (requiredFieldsInfo != null) {
        // convert to hcatschema and pass to HCatInputFormat
        try {
            //push down projections to columnar store works for RCFile and ORCFile
            ArrayList<Integer> list = new ArrayList<Integer>(requiredFieldsInfo.getFields().size());
            for (RequiredField rf : requiredFieldsInfo.getFields()) {
                list.add(rf.getIndex());
            }
            ColumnProjectionUtils.setReadColumns(job.getConfiguration(), list);
            outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass());
            HCatInputFormat.setOutputSchema(job, outputSchema);
        } catch (Exception e) {
            throw new IOException(e);
        }
    } else {
        // else - this means pig's optimizer never invoked the pushProjection
        // method - so we need all fields and hence we should not call the
        // setOutputSchema on HCatInputFormat
        ColumnProjectionUtils.setReadAllColumns(job.getConfiguration());
        if (HCatUtil.checkJobContextIfRunningFromBackend(job)) {
            try {
                HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA);
                outputSchema = hcatTableSchema;
                HCatInputFormat.setOutputSchema(job, outputSchema);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("outputSchema=" + outputSchema);
    }

}

From source file:org.apache.hive.hcatalog.pig.HCatStorer.java

License:Apache License

/**
 * @param location databaseName.tableName
 *///ww  w.  j  a va 2  s .  c  om
@Override
public void setStoreLocation(String location, Job job) throws IOException {
    Configuration config = job.getConfiguration();
    config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign);
    Properties udfProps = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { sign });
    String[] userStr = location.split("\\.");

    if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) {
        for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) {
            PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString());
        }
        Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign);
        if (crd != null) {
            job.getCredentials().addAll(crd);
        }
    } else {
        Job clone = new Job(job.getConfiguration());
        OutputJobInfo outputJobInfo;
        if (userStr.length == 2) {
            outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions);
        } else if (userStr.length == 1) {
            outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions);
        } else {
            throw new FrontendException(
                    "location " + location + " is invalid. It must be of the form [db.]table",
                    PigHCatUtil.PIG_EXCEPTION_CODE);
        }
        Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA));
        if (schema != null) {
            pigSchema = schema;
        }
        if (pigSchema == null) {
            throw new FrontendException("Schema for data cannot be determined.",
                    PigHCatUtil.PIG_EXCEPTION_CODE);
        }
        String externalLocation = (String) udfProps
                .getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION);
        if (externalLocation != null) {
            outputJobInfo.setLocation(externalLocation);
        }
        try {
            HCatOutputFormat.setOutput(job, outputJobInfo);
        } catch (HCatException he) {
            // pass the message to the user - essentially something about
            // the table
            // information passed to HCatOutputFormat was not right
            throw new PigException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
        }
        HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job.getConfiguration());
        try {
            doSchemaValidations(pigSchema, hcatTblSchema);
        } catch (HCatException he) {
            throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
        }
        computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema);
        HCatOutputFormat.setSchema(job, computedSchema);
        udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema));

        // We will store all the new /changed properties in the job in the
        // udf context, so the the HCatOutputFormat.setOutput and setSchema
        // methods need not be called many times.
        for (Entry<String, String> keyValue : job.getConfiguration()) {
            String oldValue = clone.getConfiguration().getRaw(keyValue.getKey());
            if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) {
                udfProps.put(keyValue.getKey(), keyValue.getValue());
            }
        }
        //Store credentials in a private hash map and not the udf context to
        // make sure they are not public.
        jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials());
        udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true);
    }
}

From source file:org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob.java

License:Apache License

/**
 * Enqueue the job and print out the job id for later collection.
 * @see org.apache.hive.hcatalog.templeton.CompleteDelegator
 *//*  www . j a v a 2  s .  com*/
@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, TException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Preparing to submit job: " + Arrays.toString(args));
    }
    Configuration conf = getConf();

    conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args));
    String memoryMb = appConf.mapperMemoryMb();
    if (memoryMb != null && memoryMb.length() != 0) {
        conf.set(AppConfig.HADOOP_MAP_MEMORY_MB, memoryMb);
    }
    String amMemoryMB = appConf.amMemoryMb();
    if (amMemoryMB != null && !amMemoryMB.isEmpty()) {
        conf.set(AppConfig.HADOOP_MR_AM_MEMORY_MB, amMemoryMB);
    }
    String amJavaOpts = appConf.controllerAMChildOpts();
    if (amJavaOpts != null && !amJavaOpts.isEmpty()) {
        conf.set(AppConfig.HADOOP_MR_AM_JAVA_OPTS, amJavaOpts);
    }

    String user = UserGroupInformation.getCurrentUser().getShortUserName();
    conf.set("user.name", user);
    Job job = new Job(conf);
    job.setJarByClass(LaunchMapper.class);
    job.setJobName(TempletonControllerJob.class.getSimpleName());
    job.setMapperClass(LaunchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(SingleInputFormat.class);

    NullOutputFormat<NullWritable, NullWritable> of = new NullOutputFormat<NullWritable, NullWritable>();
    job.setOutputFormatClass(of.getClass());
    job.setNumReduceTasks(0);

    JobClient jc = new JobClient(new JobConf(job.getConfiguration()));

    if (UserGroupInformation.isSecurityEnabled()) {
        Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token"));
        job.getCredentials().addToken(new Text("mr token"), mrdt);
    }
    String metastoreTokenStrForm = addHMSToken(job, user);

    job.submit();

    submittedJobId = job.getJobID();
    if (metastoreTokenStrForm != null) {
        //so that it can be cancelled later from CompleteDelegator
        DelegationTokenCache.getStringFormTokenCache().storeDelegationToken(submittedJobId.toString(),
                metastoreTokenStrForm);
        LOG.debug("Added metastore delegation token for jobId=" + submittedJobId.toString() + " user=" + user);
    }
    return 0;
}

From source file:org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob.java

License:Apache License

private String addHMSToken(Job job, String user) throws IOException, InterruptedException, TException {
    if (!secureMetastoreAccess) {
        return null;
    }/*  w  w  w  .j a  v a2  s.  c  om*/
    Token<org.apache.hadoop.hive.thrift.DelegationTokenIdentifier> hiveToken = new Token<org.apache.hadoop.hive.thrift.DelegationTokenIdentifier>();
    String metastoreTokenStrForm = buildHcatDelegationToken(user);
    hiveToken.decodeFromUrlString(metastoreTokenStrForm);
    job.getCredentials().addToken(new Text(SecureProxySupport.HCAT_SERVICE), hiveToken);
    return metastoreTokenStrForm;
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder.java

License:Apache License

private Vertex newVertex(TezOperator tezOp, boolean isMap)
        throws IOException, ClassNotFoundException, InterruptedException {
    ProcessorDescriptor procDesc = ProcessorDescriptor.create(tezOp.getProcessorName());

    // Pass physical plans to vertex as user payload.
    JobConf payloadConf = new JobConf(ConfigurationUtil.toConfiguration(pc.getProperties(), false));

    // We do this so that dag.getCredentials(), job.getCredentials(),
    // job.getConfiguration().getCredentials() all reference the same Credentials object
    // Unfortunately there is no setCredentials() on Job
    payloadConf.setCredentials(dag.getCredentials());
    // We won't actually use this job, but we need it to talk with the Load Store funcs
    @SuppressWarnings("deprecation")
    Job job = new Job(payloadConf);
    payloadConf = (JobConf) job.getConfiguration();

    if (tezOp.sampleOperator != null) {
        payloadConf.set(PigProcessor.SAMPLE_VERTEX, tezOp.sampleOperator.getOperatorKey().toString());
    }/* w  ww  . java2 s.c o m*/

    if (tezOp.sortOperator != null) {
        payloadConf.set(PigProcessor.SORT_VERTEX, tezOp.sortOperator.getOperatorKey().toString());
    }

    String tmp;
    long maxCombinedSplitSize = 0;
    if (!tezOp.combineSmallSplits()
            || pc.getProperties().getProperty(PigConfiguration.PIG_SPLIT_COMBINATION, "true").equals("false"))
        payloadConf.setBoolean(PigConfiguration.PIG_NO_SPLIT_COMBINATION, true);
    else if ((tmp = pc.getProperties().getProperty(PigConfiguration.PIG_MAX_COMBINED_SPLIT_SIZE,
            null)) != null) {
        try {
            maxCombinedSplitSize = Long.parseLong(tmp);
        } catch (NumberFormatException e) {
            log.warn(
                    "Invalid numeric format for pig.maxCombinedSplitSize; use the default maximum combined split size");
        }
    }
    if (maxCombinedSplitSize > 0)
        payloadConf.setLong("pig.maxCombinedSplitSize", maxCombinedSplitSize);

    payloadConf.set("pig.inputs", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInp()));
    payloadConf.set("pig.inpSignatures",
            ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpSignatureLists()));
    payloadConf.set("pig.inpLimits", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpLimits()));
    // Process stores
    LinkedList<POStore> stores = processStores(tezOp, payloadConf, job);

    payloadConf.set("pig.pigContext", ObjectSerializer.serialize(pc));
    payloadConf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList()));
    payloadConf.set("exectype", "TEZ");
    payloadConf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
    payloadConf.setClass(MRConfiguration.INPUTFORMAT_CLASS, PigInputFormat.class, InputFormat.class);

    // Set parent plan for all operators in the Tez plan.
    new PhyPlanSetter(tezOp.plan).visit();

    // Set the endOfAllInput flag on the physical plan if certain operators that
    // use this property (such as STREAM) are present in the plan.
    EndOfAllInputSetter.EndOfAllInputChecker checker = new EndOfAllInputSetter.EndOfAllInputChecker(tezOp.plan);
    checker.visit();
    if (checker.isEndOfAllInputPresent()) {
        payloadConf.set(JobControlCompiler.END_OF_INP_IN_MAP, "true");
    }

    // Configure the classes for incoming shuffles to this TezOp
    // TODO: Refactor out resetting input keys, PIG-3957
    List<PhysicalOperator> roots = tezOp.plan.getRoots();
    if (roots.size() == 1 && roots.get(0) instanceof POPackage) {
        POPackage pack = (POPackage) roots.get(0);

        List<PhysicalOperator> succsList = tezOp.plan.getSuccessors(pack);
        if (succsList != null) {
            succsList = new ArrayList<PhysicalOperator>(succsList);
        }
        byte keyType = pack.getPkgr().getKeyType();
        tezOp.plan.remove(pack);
        payloadConf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
        setIntermediateOutputKeyValue(keyType, payloadConf, tezOp);
        POShuffleTezLoad newPack;
        newPack = new POShuffleTezLoad(pack);
        if (tezOp.isSkewedJoin()) {
            newPack.setSkewedJoins(true);
        }
        tezOp.plan.add(newPack);

        // Set input keys for POShuffleTezLoad. This is used to identify
        // the inputs that are attached to the POShuffleTezLoad in the
        // backend.
        Map<Integer, String> localRearrangeMap = new TreeMap<Integer, String>();
        for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
            if (tezOp.sampleOperator != null && tezOp.sampleOperator == pred) {
                // skip sample vertex input
            } else {
                String inputKey = pred.getOperatorKey().toString();
                if (pred.isVertexGroup()) {
                    pred = mPlan.getOperator(pred.getVertexGroupMembers().get(0));
                }
                LinkedList<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(pred.plan,
                        POLocalRearrangeTez.class);
                for (POLocalRearrangeTez lr : lrs) {
                    if (lr.isConnectedToPackage()
                            && lr.getOutputKey().equals(tezOp.getOperatorKey().toString())) {
                        localRearrangeMap.put((int) lr.getIndex(), inputKey);
                    }
                }
            }
        }
        for (Map.Entry<Integer, String> entry : localRearrangeMap.entrySet()) {
            newPack.addInputKey(entry.getValue());
        }

        if (succsList != null) {
            for (PhysicalOperator succs : succsList) {
                tezOp.plan.connect(newPack, succs);
            }
        }

        setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp);
    } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) {
        POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0);
        // TODO Need to fix multiple input key mapping
        TezOperator identityInOutPred = null;
        for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
            if (!pred.isSampleAggregation()) {
                identityInOutPred = pred;
                break;
            }
        }
        identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString());
    } else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) {
        POValueInputTez valueInput = (POValueInputTez) roots.get(0);

        LinkedList<String> scalarInputs = new LinkedList<String>();
        for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class)) {
            if (userFunc.getFunc() instanceof ReadScalarsTez) {
                scalarInputs.add(((ReadScalarsTez) userFunc.getFunc()).getTezInputs()[0]);
            }
        }
        // Make sure we don't find the scalar
        for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
            if (!scalarInputs.contains(pred.getOperatorKey().toString())) {
                valueInput.setInputKey(pred.getOperatorKey().toString());
                break;
            }
        }
    }
    JobControlCompiler.setOutputFormat(job);

    // set parent plan in all operators. currently the parent plan is really
    // used only when POStream, POSplit are present in the plan
    new PhyPlanSetter(tezOp.plan).visit();

    // Serialize the execution plan
    payloadConf.set(PigProcessor.PLAN, ObjectSerializer.serialize(tezOp.plan));

    UDFContext.getUDFContext().serialize(payloadConf);

    MRToTezHelper.processMRSettings(payloadConf, globalConf);

    if (!pc.inIllustrator) {
        for (POStore store : stores) {
            // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized
            store.setInputs(null);
            store.setParentPlan(null);
        }
        // We put them in the reduce because PigOutputCommitter checks the
        // ID of the task to see if it's a map, and if not, calls the reduce
        // committers.
        payloadConf.set(JobControlCompiler.PIG_MAP_STORES,
                ObjectSerializer.serialize(new ArrayList<POStore>()));
        payloadConf.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(stores));
    }

    if (tezOp.isNeedEstimateParallelism()) {
        payloadConf.setBoolean(PigProcessor.ESTIMATE_PARALLELISM, true);
        log.info("Estimate quantile for sample aggregation vertex " + tezOp.getOperatorKey().toString());
    }

    // Take our assembled configuration and create a vertex
    UserPayload userPayload = TezUtils.createUserPayloadFromConf(payloadConf);
    procDesc.setUserPayload(userPayload);

    Vertex vertex = Vertex.create(tezOp.getOperatorKey().toString(), procDesc, tezOp.getVertexParallelism(),
            isMap ? MRHelpers.getResourceForMRMapper(globalConf)
                    : MRHelpers.getResourceForMRReducer(globalConf));

    Map<String, String> taskEnv = new HashMap<String, String>();
    MRHelpers.updateEnvBasedOnMRTaskEnv(globalConf, taskEnv, isMap);
    vertex.setTaskEnvironment(taskEnv);

    // All these classes are @InterfaceAudience.Private in Hadoop. Switch to Tez methods in TEZ-1012
    // set the timestamps, public/private visibility of the archives and files
    ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(globalConf);
    // get DelegationToken for each cached file
    ClientDistributedCacheManager.getDelegationTokens(globalConf, job.getCredentials());
    MRApps.setupDistributedCache(globalConf, localResources);
    vertex.addTaskLocalFiles(localResources);

    vertex.setTaskLaunchCmdOpts(isMap ? MRHelpers.getJavaOptsForMRMapper(globalConf)
            : MRHelpers.getJavaOptsForMRReducer(globalConf));

    log.info("For vertex - " + tezOp.getOperatorKey().toString() + ": parallelism="
            + tezOp.getVertexParallelism() + ", memory=" + vertex.getTaskResource().getMemory() + ", java opts="
            + vertex.getTaskLaunchCmdOpts());

    // Right now there can only be one of each of these. Will need to be
    // more generic when there can be more.
    for (POLoad ld : tezOp.getLoaderInfo().getLoads()) {

        // TODO: These should get the globalConf, or a merged version that
        // keeps settings like pig.maxCombinedSplitSize
        vertex.setLocationHint(
                VertexLocationHint.create(tezOp.getLoaderInfo().getInputSplitInfo().getTaskLocationHints()));
        vertex.addDataSource(ld.getOperatorKey().toString(), DataSourceDescriptor.create(
                InputDescriptor.create(MRInput.class.getName())
                        .setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder()
                                .setConfigurationBytes(TezUtils.createByteStringFromConf(payloadConf))
                                .setSplits(tezOp.getLoaderInfo().getInputSplitInfo().getSplitsProto()).build()
                                .toByteString().asReadOnlyByteBuffer())),
                InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()),
                dag.getCredentials()));
    }

    for (POStore store : stores) {

        ArrayList<POStore> emptyList = new ArrayList<POStore>();
        ArrayList<POStore> singleStore = new ArrayList<POStore>();
        singleStore.add(store);

        Configuration outputPayLoad = new Configuration(payloadConf);
        outputPayLoad.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(emptyList));
        outputPayLoad.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(singleStore));

        OutputDescriptor storeOutDescriptor = OutputDescriptor.create(MROutput.class.getName())
                .setUserPayload(TezUtils.createUserPayloadFromConf(outputPayLoad));
        if (tezOp.getVertexGroupStores() != null) {
            OperatorKey vertexGroupKey = tezOp.getVertexGroupStores().get(store.getOperatorKey());
            if (vertexGroupKey != null) {
                getPlan().getOperator(vertexGroupKey).getVertexGroupInfo()
                        .setStoreOutputDescriptor(storeOutDescriptor);
                continue;
            }
        }
        vertex.addDataSink(store.getOperatorKey().toString(), new DataSinkDescriptor(storeOutDescriptor,
                OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), dag.getCredentials()));
    }

    // LoadFunc and StoreFunc add delegation tokens to Job Credentials in
    // setLocation and setStoreLocation respectively. For eg: HBaseStorage
    // InputFormat add delegation token in getSplits and OutputFormat in
    // checkOutputSpecs. For eg: FileInputFormat and FileOutputFormat
    if (stores.size() > 0) {
        new PigOutputFormat().checkOutputSpecs(job);
    }

    // Set the right VertexManagerPlugin
    if (tezOp.getEstimatedParallelism() != -1) {
        if (tezOp.isGlobalSort() || tezOp.isSkewedJoin()) {
            // Set VertexManagerPlugin to PartitionerDefinedVertexManager, which is able
            // to decrease/increase parallelism of sorting vertex dynamically
            // based on the numQuantiles calculated by sample aggregation vertex
            vertex.setVertexManagerPlugin(
                    VertexManagerPluginDescriptor.create(PartitionerDefinedVertexManager.class.getName()));
            log.info("Set VertexManagerPlugin to PartitionerDefinedParallelismVertexManager for vertex "
                    + tezOp.getOperatorKey().toString());
        } else {
            boolean containScatterGather = false;
            boolean containCustomPartitioner = false;
            for (TezEdgeDescriptor edge : tezOp.inEdges.values()) {
                if (edge.dataMovementType == DataMovementType.SCATTER_GATHER) {
                    containScatterGather = true;
                }
                if (edge.partitionerClass != null) {
                    containCustomPartitioner = true;
                }
            }
            if (containScatterGather && !containCustomPartitioner) {
                // Use auto-parallelism feature of ShuffleVertexManager to dynamically
                // reduce the parallelism of the vertex
                VertexManagerPluginDescriptor vmPluginDescriptor = VertexManagerPluginDescriptor
                        .create(ShuffleVertexManager.class.getName());
                Configuration vmPluginConf = ConfigurationUtil.toConfiguration(pc.getProperties(), false);
                vmPluginConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
                        true);
                if (vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM,
                        InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) != InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) {
                    vmPluginConf.setLong(
                            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
                            vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM,
                                    InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER));
                }
                vmPluginDescriptor.setUserPayload(TezUtils.createUserPayloadFromConf(vmPluginConf));
                vertex.setVertexManagerPlugin(vmPluginDescriptor);
                log.info("Set auto parallelism for vertex " + tezOp.getOperatorKey().toString());
            }
        }
    }

    // Reset udfcontext jobconf. It is not supposed to be set in the front end
    UDFContext.getUDFContext().addJobConf(null);
    return vertex;
}

From source file:org.apache.sqoop.job.mr.MRConfigurationUtils.java

License:Apache License

/**
 * Persist Connector configuration object for link.
 *
 * @param job MapReduce job object/*w  ww.  j  a  v a  2s  . co m*/
 * @param obj Configuration object
 */
public static void setConnectorLinkConfig(Direction type, Job job, Object obj) {
    switch (type) {
    case FROM:
        job.getConfiguration().set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK, obj.getClass().getName());
        job.getCredentials().addSecretKey(MR_JOB_CONFIG_FROM_CONNECTOR_LINK_KEY,
                ConfigUtils.toJson(obj).getBytes());
        break;

    case TO:
        job.getConfiguration().set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK, obj.getClass().getName());
        job.getCredentials().addSecretKey(MR_JOB_CONFIG_TO_CONNECTOR_LINK_KEY,
                ConfigUtils.toJson(obj).getBytes());
        break;
    }
}

From source file:org.apache.sqoop.job.mr.MRConfigurationUtils.java

License:Apache License

/**
 * Persist Connector configuration objects for job.
 *
 * @param job MapReduce job object/* w w w .  jav  a2s. co m*/
 * @param obj Configuration object
 */
public static void setConnectorJobConfig(Direction type, Job job, Object obj) {
    switch (type) {
    case FROM:
        job.getConfiguration().set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB, obj.getClass().getName());
        job.getCredentials().addSecretKey(MR_JOB_CONFIG_FROM_JOB_CONFIG_KEY,
                ConfigUtils.toJson(obj).getBytes());
        break;

    case TO:
        job.getConfiguration().set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB, obj.getClass().getName());
        job.getCredentials().addSecretKey(MR_JOB_CONFIG_TO_JOB_CONFIG_KEY, ConfigUtils.toJson(obj).getBytes());
        break;
    }
}