Example usage for org.apache.hadoop.mapreduce Job getCredentials

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCredentials.

Prototype

public Credentials getCredentials()

Source Link

Usage

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.PipesReducer
 * test the transfer of data: key and value
 *
 * @throws Exception/*ww w .  jav  a2s  .  c om*/
 */
@Test
public void testPipesReducer() throws Exception {
    System.err.println("testPipesReducer");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        BooleanWritable bw = new BooleanWritable(true);
        List<Text> texts = new ArrayList<Text>();
        texts.add(new Text("first"));
        texts.add(new Text("second"));
        texts.add(new Text("third"));

        DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator();

        ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class,
                Text.class);

        PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>();
        reducer.setup(context);

        initStdOut(conf);
        reducer.reduce(bw, texts, context);
        reducer.cleanup(context);
        String stdOut = readStdOut(conf);

        // test data: key
        assertTrue(stdOut.contains("reducer key :true"));
        // and values
        assertTrue(stdOut.contains("reduce value  :first"));
        assertTrue(stdOut.contains("reduce value  :second"));
        assertTrue(stdOut.contains("reduce value  :third"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }

}

From source file:org.apache.accumulo.core.client.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Sets the connector information needed to communicate with Accumulo in this job.
 *
 * <p>/*from w w  w . j a  v a 2  s.c o m*/
 * <b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the configuration as a means to pass the token to MapReduce tasks. This
 * information is BASE64 encoded to provide a charset safe conversion to a string, but this conversion is not intended to be secure. {@link PasswordToken} is
 * one example that is insecure in this way; however {@link DelegationToken}s, acquired using
 * {@link SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern.
 *
 * @param job
 *          the Hadoop job instance to be configured
 * @param principal
 *          a valid Accumulo user name (user must have Table.CREATE permission)
 * @param token
 *          the user's password
 * @since 1.5.0
 */
public static void setConnectorInfo(Job job, String principal, AuthenticationToken token)
        throws AccumuloSecurityException {
    if (token instanceof KerberosToken) {
        log.info("Received KerberosToken, attempting to fetch DelegationToken");
        try {
            Instance instance = getInstance(job);
            Connector conn = instance.getConnector(principal, token);
            token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());
        } catch (Exception e) {
            log.warn(
                    "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo",
                    e);
        }
    }
    // DelegationTokens can be passed securely from user to task without serializing insecurely in the configuration
    if (token instanceof DelegationTokenImpl) {
        DelegationTokenImpl delegationToken = (DelegationTokenImpl) token;

        // Convert it into a Hadoop Token
        AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier();
        Token<AuthenticationTokenIdentifier> hadoopToken = new Token<>(identifier.getBytes(),
                delegationToken.getPassword(), identifier.getKind(), delegationToken.getServiceName());

        // Add the Hadoop Token to the Job so it gets serialized and passed along.
        job.getCredentials().addToken(hadoopToken.getService(), hadoopToken);
    }

    InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
}

From source file:org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat.java

License:Apache License

/**
 * Sets the connector information needed to communicate with Accumulo in this job.
 *
 * <p>//from   w w w .j  a va2  s  . c  om
 * <b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the configuration as a means to pass the token to MapReduce tasks. This
 * information is BASE64 encoded to provide a charset safe conversion to a string, but this conversion is not intended to be secure. {@link PasswordToken} is
 * one example that is insecure in this way; however {@link DelegationToken}s, acquired using
 * {@link SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern.
 *
 * @param job
 *          the Hadoop job instance to be configured
 * @param principal
 *          a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(Job, boolean)} is set to true)
 * @param token
 *          the user's password
 * @since 1.5.0
 */
public static void setConnectorInfo(Job job, String principal, AuthenticationToken token)
        throws AccumuloSecurityException {
    if (token instanceof KerberosToken) {
        log.info("Received KerberosToken, attempting to fetch DelegationToken");
        try {
            Instance instance = getInstance(job);
            Connector conn = instance.getConnector(principal, token);
            token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());
        } catch (Exception e) {
            log.warn(
                    "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo",
                    e);
        }
    }
    // DelegationTokens can be passed securely from user to task without serializing insecurely in the configuration
    if (token instanceof DelegationTokenImpl) {
        DelegationTokenImpl delegationToken = (DelegationTokenImpl) token;

        // Convert it into a Hadoop Token
        AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier();
        Token<AuthenticationTokenIdentifier> hadoopToken = new Token<>(identifier.getBytes(),
                delegationToken.getPassword(), identifier.getKind(), delegationToken.getServiceName());

        // Add the Hadoop Token to the Job so it gets serialized and passed along.
        job.getCredentials().addToken(hadoopToken.getService(), hadoopToken);
    }

    OutputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Sets connection information needed to communicate with Accumulo for this job
 *
 * @param job//w ww.ja  v a2 s . c  om
 *          Hadoop job instance to be configured
 * @param info
 *          Connection information for Accumulo
 * @since 2.0.0
 */
public static void setClientInfo(Job job, ClientInfo info) {
    ClientInfo inputInfo = InputConfigurator.updateToken(job.getCredentials(), info);
    InputConfigurator.setClientInfo(CLASS, job.getConfiguration(), inputInfo);
}

From source file:org.apache.falcon.replication.CustomReplicator.java

License:Apache License

@Override
protected Path createInputFileListing(Job job) throws IOException {
    Path fileListingPath = getFileListingPath();
    FilteredCopyListing copyListing = new FilteredCopyListing(job.getConfiguration(), job.getCredentials());
    copyListing.buildListing(fileListingPath, inputOptions);
    LOG.info("Number of paths considered for copy: {}", copyListing.getNumberOfPaths());
    LOG.info("Number of bytes considered for copy: {} (Actual number of bytes copied depends on whether "
            + "any files are skipped or overwritten)", copyListing.getBytesToCopy());
    return fileListingPath;
}

From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopOutputFormatBase.java

License:Apache License

public HadoopOutputFormatBase(org.apache.hadoop.mapreduce.OutputFormat<K, V> mapreduceOutputFormat, Job job) {
    super(job.getCredentials());
    this.mapreduceOutputFormat = mapreduceOutputFormat;
    this.configuration = job.getConfiguration();
    HadoopUtils.mergeHadoopConf(configuration);
}

From source file:org.apache.hcatalog.hbase.ImportSequenceFile.java

License:Apache License

/**
 * Method to run the Importer MapReduce Job. Normally will be called by another MR job
 * during OutputCommitter.commitJob().//from   w w w . j  a  v a  2 s .  com
 * @param parentContext JobContext of the parent job
 * @param tableName name of table to bulk load data into
 * @param InputDir path of SequenceFile formatted data to read
 * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported
 * @return
 */
static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) {
    Configuration parentConf = parentContext.getConfiguration();
    Configuration conf = new Configuration();
    for (Map.Entry<String, String> el : parentConf) {
        if (el.getKey().startsWith("hbase."))
            conf.set(el.getKey(), el.getValue());
        if (el.getKey().startsWith("mapred.cache.archives"))
            conf.set(el.getKey(), el.getValue());
    }

    //Inherit jar dependencies added to distributed cache loaded by parent job
    conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", ""));
    conf.set("mapreduce.job.cache.archives.visibilities",
            parentConf.get("mapreduce.job.cache.archives.visibilities", ""));

    //Temporary fix until hbase security is ready
    //We need the written HFile to be world readable so
    //hbase regionserver user has the privileges to perform a hdfs move
    if (parentConf.getBoolean("hadoop.security.authorization", false)) {
        FsPermission.setUMask(conf, FsPermission.valueOf("----------"));
    }

    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false);

    boolean localMode = "local".equals(conf.get("mapred.job.tracker"));

    boolean success = false;
    try {
        FileSystem fs = FileSystem.get(parentConf);
        Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR);
        if (!fs.mkdirs(workDir))
            throw new IOException("Importer work directory already exists: " + workDir);
        Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode);
        job.setWorkingDirectory(workDir);
        job.getCredentials().addAll(parentContext.getCredentials());
        success = job.waitForCompletion(true);
        fs.delete(workDir, true);
        //We only cleanup on success because failure might've been caused by existence of target directory
        if (localMode && success) {
            new ImporterOutputFormat().getOutputCommitter(
                    org.apache.hadoop.mapred.HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID()))
                    .commitJob(job);
        }
    } catch (InterruptedException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (ClassNotFoundException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (IOException e) {
        LOG.error("ImportSequenceFile Failed", e);
    }
    return success;
}

From source file:org.apache.hcatalog.mapreduce.HCatOutputFormat.java

License:Apache License

/**
 * @see org.apache.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo)
 *///from  ww  w .ja v  a2s .  c  o  m
public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException {
    setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo);
}

From source file:org.apache.hcatalog.mapreduce.Security.java

License:Apache License

void handleSecurity(Job job, OutputJobInfo outputJobInfo, HiveMetaStoreClient client, Configuration conf,
        boolean harRequested) throws IOException, MetaException, TException, Exception {
    handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested);
}

From source file:org.apache.hcatalog.pig.HCatLoader.java

License:Apache License

@Override
public void setLocation(String location, Job job) throws IOException {
    HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get()
            .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true);

    UDFContext udfContext = UDFContext.getUDFContext();
    Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature });
    job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature);
    Pair<String, String> dbTablePair = PigHCatUtil.getDBTableNames(location);
    dbName = dbTablePair.first;//from ww  w.  j a v a  2s.c o m
    tableName = dbTablePair.second;

    RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO);
    // get partitionFilterString stored in the UDFContext - it would have
    // been stored there by an earlier call to setPartitionFilter
    // call setInput on HCatInputFormat only in the frontend because internally
    // it makes calls to the hcat server - we don't want these to happen in
    // the backend
    // in the hadoop front end mapred.task.id property will not be set in
    // the Configuration
    if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) {
        for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) {
            PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(),
                    emr.nextElement().toString());
        }
        if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) {
            //Combine credentials and credentials from job takes precedence for freshness
            Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature);
            crd.addAll(job.getCredentials());
            job.getCredentials().addAll(crd);
        }
    } else {
        Job clone = new Job(job.getConfiguration());
        HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString());

        // We will store all the new /changed properties in the job in the
        // udf context, so the the HCatInputFormat.setInput method need not
        //be called many times.
        for (Entry<String, String> keyValue : job.getConfiguration()) {
            String oldValue = clone.getConfiguration().getRaw(keyValue.getKey());
            if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) {
                udfProps.put(keyValue.getKey(), keyValue.getValue());
            }
        }
        udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true);

        //Store credentials in a private hash map and not the udf context to
        // make sure they are not public.
        Credentials crd = new Credentials();
        crd.addAll(job.getCredentials());
        jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd);
    }

    // Need to also push projections by calling setOutputSchema on
    // HCatInputFormat - we have to get the RequiredFields information
    // from the UdfContext, translate it to an Schema and then pass it
    // The reason we do this here is because setLocation() is called by
    // Pig runtime at InputFormat.getSplits() and
    // InputFormat.createRecordReader() time - we are not sure when
    // HCatInputFormat needs to know about pruned projections - so doing it
    // here will ensure we communicate to HCatInputFormat about pruned
    // projections at getSplits() and createRecordReader() time

    if (requiredFieldsInfo != null) {
        // convert to hcatschema and pass to HCatInputFormat
        try {
            outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass());
            HCatInputFormat.setOutputSchema(job, outputSchema);
        } catch (Exception e) {
            throw new IOException(e);
        }
    } else {
        // else - this means pig's optimizer never invoked the pushProjection
        // method - so we need all fields and hence we should not call the
        // setOutputSchema on HCatInputFormat
        if (HCatUtil.checkJobContextIfRunningFromBackend(job)) {
            try {
                HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA);
                outputSchema = hcatTableSchema;
                HCatInputFormat.setOutputSchema(job, outputSchema);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    }

}