Example usage for org.apache.hadoop.mapreduce Job getCredentials

List of usage examples for org.apache.hadoop.mapreduce Job getCredentials

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCredentials.

Prototype

public Credentials getCredentials() 

Source Link

Usage

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.PipesReducer
 * test the transfer of data: key and value
 *
 * @throws Exception/*ww w .  jav  a2s  .  c om*/
 */
@Test
public void testPipesReducer() throws Exception {
    System.err.println("testPipesReducer");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        BooleanWritable bw = new BooleanWritable(true);
        List<Text> texts = new ArrayList<Text>();
        texts.add(new Text("first"));
        texts.add(new Text("second"));
        texts.add(new Text("third"));

        DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator();

        ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class,
                Text.class);

        PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>();
        reducer.setup(context);

        initStdOut(conf);
        reducer.reduce(bw, texts, context);
        reducer.cleanup(context);
        String stdOut = readStdOut(conf);

        // test data: key
        assertTrue(stdOut.contains("reducer key :true"));
        // and values
        assertTrue(stdOut.contains("reduce value  :first"));
        assertTrue(stdOut.contains("reduce value  :second"));
        assertTrue(stdOut.contains("reduce value  :third"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }

}

From source file:org.apache.accumulo.core.client.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Sets the connector information needed to communicate with Accumulo in this job.
 *
 * <p>/*from w w  w . j a  v a 2  s.c o m*/
 * <b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the configuration as a means to pass the token to MapReduce tasks. This
 * information is BASE64 encoded to provide a charset safe conversion to a string, but this conversion is not intended to be secure. {@link PasswordToken} is
 * one example that is insecure in this way; however {@link DelegationToken}s, acquired using
 * {@link SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern.
 *
 * @param job
 *          the Hadoop job instance to be configured
 * @param principal
 *          a valid Accumulo user name (user must have Table.CREATE permission)
 * @param token
 *          the user's password
 * @since 1.5.0
 */
public static void setConnectorInfo(Job job, String principal, AuthenticationToken token)
        throws AccumuloSecurityException {
    if (token instanceof KerberosToken) {
        log.info("Received KerberosToken, attempting to fetch DelegationToken");
        try {
            Instance instance = getInstance(job);
            Connector conn = instance.getConnector(principal, token);
            token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());
        } catch (Exception e) {
            log.warn(
                    "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo",
                    e);
        }
    }
    // DelegationTokens can be passed securely from user to task without serializing insecurely in the configuration
    if (token instanceof DelegationTokenImpl) {
        DelegationTokenImpl delegationToken = (DelegationTokenImpl) token;

        // Convert it into a Hadoop Token
        AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier();
        Token<AuthenticationTokenIdentifier> hadoopToken = new Token<>(identifier.getBytes(),
                delegationToken.getPassword(), identifier.getKind(), delegationToken.getServiceName());

        // Add the Hadoop Token to the Job so it gets serialized and passed along.
        job.getCredentials().addToken(hadoopToken.getService(), hadoopToken);
    }

    InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
}

From source file:org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat.java

License:Apache License

/**
 * Sets the connector information needed to communicate with Accumulo in this job.
 *
 * <p>//from   w w w .j  a va2  s  . c  om
 * <b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the configuration as a means to pass the token to MapReduce tasks. This
 * information is BASE64 encoded to provide a charset safe conversion to a string, but this conversion is not intended to be secure. {@link PasswordToken} is
 * one example that is insecure in this way; however {@link DelegationToken}s, acquired using
 * {@link SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern.
 *
 * @param job
 *          the Hadoop job instance to be configured
 * @param principal
 *          a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(Job, boolean)} is set to true)
 * @param token
 *          the user's password
 * @since 1.5.0
 */
public static void setConnectorInfo(Job job, String principal, AuthenticationToken token)
        throws AccumuloSecurityException {
    if (token instanceof KerberosToken) {
        log.info("Received KerberosToken, attempting to fetch DelegationToken");
        try {
            Instance instance = getInstance(job);
            Connector conn = instance.getConnector(principal, token);
            token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());
        } catch (Exception e) {
            log.warn(
                    "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo",
                    e);
        }
    }
    // DelegationTokens can be passed securely from user to task without serializing insecurely in the configuration
    if (token instanceof DelegationTokenImpl) {
        DelegationTokenImpl delegationToken = (DelegationTokenImpl) token;

        // Convert it into a Hadoop Token
        AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier();
        Token<AuthenticationTokenIdentifier> hadoopToken = new Token<>(identifier.getBytes(),
                delegationToken.getPassword(), identifier.getKind(), delegationToken.getServiceName());

        // Add the Hadoop Token to the Job so it gets serialized and passed along.
        job.getCredentials().addToken(hadoopToken.getService(), hadoopToken);
    }

    OutputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java

License:Apache License

/**
 * Sets connection information needed to communicate with Accumulo for this job
 *
 * @param job//w ww.ja  v a2 s . c  om
 *          Hadoop job instance to be configured
 * @param info
 *          Connection information for Accumulo
 * @since 2.0.0
 */
public static void setClientInfo(Job job, ClientInfo info) {
    ClientInfo inputInfo = InputConfigurator.updateToken(job.getCredentials(), info);
    InputConfigurator.setClientInfo(CLASS, job.getConfiguration(), inputInfo);
}

From source file:org.apache.falcon.replication.CustomReplicator.java

License:Apache License

@Override
protected Path createInputFileListing(Job job) throws IOException {
    Path fileListingPath = getFileListingPath();
    FilteredCopyListing copyListing = new FilteredCopyListing(job.getConfiguration(), job.getCredentials());
    copyListing.buildListing(fileListingPath, inputOptions);
    LOG.info("Number of paths considered for copy: {}", copyListing.getNumberOfPaths());
    LOG.info("Number of bytes considered for copy: {} (Actual number of bytes copied depends on whether "
            + "any files are skipped or overwritten)", copyListing.getBytesToCopy());
    return fileListingPath;
}

From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopOutputFormatBase.java

License:Apache License

public HadoopOutputFormatBase(org.apache.hadoop.mapreduce.OutputFormat<K, V> mapreduceOutputFormat, Job job) {
    super(job.getCredentials());
    this.mapreduceOutputFormat = mapreduceOutputFormat;
    this.configuration = job.getConfiguration();
    HadoopUtils.mergeHadoopConf(configuration);
}

From source file:org.apache.hcatalog.hbase.ImportSequenceFile.java

License:Apache License

/**
 * Method to run the Importer MapReduce Job. Normally will be called by another MR job
 * during OutputCommitter.commitJob().//from   w w w . j  a  v a  2 s .  com
 * @param parentContext JobContext of the parent job
 * @param tableName name of table to bulk load data into
 * @param InputDir path of SequenceFile formatted data to read
 * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported
 * @return
 */
static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) {
    Configuration parentConf = parentContext.getConfiguration();
    Configuration conf = new Configuration();
    for (Map.Entry<String, String> el : parentConf) {
        if (el.getKey().startsWith("hbase."))
            conf.set(el.getKey(), el.getValue());
        if (el.getKey().startsWith("mapred.cache.archives"))
            conf.set(el.getKey(), el.getValue());
    }

    //Inherit jar dependencies added to distributed cache loaded by parent job
    conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", ""));
    conf.set("mapreduce.job.cache.archives.visibilities",
            parentConf.get("mapreduce.job.cache.archives.visibilities", ""));

    //Temporary fix until hbase security is ready
    //We need the written HFile to be world readable so
    //hbase regionserver user has the privileges to perform a hdfs move
    if (parentConf.getBoolean("hadoop.security.authorization", false)) {
        FsPermission.setUMask(conf, FsPermission.valueOf("----------"));
    }

    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false);

    boolean localMode = "local".equals(conf.get("mapred.job.tracker"));

    boolean success = false;
    try {
        FileSystem fs = FileSystem.get(parentConf);
        Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR);
        if (!fs.mkdirs(workDir))
            throw new IOException("Importer work directory already exists: " + workDir);
        Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode);
        job.setWorkingDirectory(workDir);
        job.getCredentials().addAll(parentContext.getCredentials());
        success = job.waitForCompletion(true);
        fs.delete(workDir, true);
        //We only cleanup on success because failure might've been caused by existence of target directory
        if (localMode && success) {
            new ImporterOutputFormat().getOutputCommitter(
                    org.apache.hadoop.mapred.HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID()))
                    .commitJob(job);
        }
    } catch (InterruptedException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (ClassNotFoundException e) {
        LOG.error("ImportSequenceFile Failed", e);
    } catch (IOException e) {
        LOG.error("ImportSequenceFile Failed", e);
    }
    return success;
}

From source file:org.apache.hcatalog.mapreduce.HCatOutputFormat.java

License:Apache License

/**
 * @see org.apache.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo)
 *///from  ww  w .ja v  a2s .  c  o  m
public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException {
    setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo);
}

From source file:org.apache.hcatalog.mapreduce.Security.java

License:Apache License

void handleSecurity(Job job, OutputJobInfo outputJobInfo, HiveMetaStoreClient client, Configuration conf,
        boolean harRequested) throws IOException, MetaException, TException, Exception {
    handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested);
}

From source file:org.apache.hcatalog.pig.HCatLoader.java

License:Apache License

@Override
public void setLocation(String location, Job job) throws IOException {
    HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get()
            .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true);

    UDFContext udfContext = UDFContext.getUDFContext();
    Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature });
    job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature);
    Pair<String, String> dbTablePair = PigHCatUtil.getDBTableNames(location);
    dbName = dbTablePair.first;//from ww  w.  j a v a  2s.c o m
    tableName = dbTablePair.second;

    RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO);
    // get partitionFilterString stored in the UDFContext - it would have
    // been stored there by an earlier call to setPartitionFilter
    // call setInput on HCatInputFormat only in the frontend because internally
    // it makes calls to the hcat server - we don't want these to happen in
    // the backend
    // in the hadoop front end mapred.task.id property will not be set in
    // the Configuration
    if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) {
        for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) {
            PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(),
                    emr.nextElement().toString());
        }
        if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) {
            //Combine credentials and credentials from job takes precedence for freshness
            Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature);
            crd.addAll(job.getCredentials());
            job.getCredentials().addAll(crd);
        }
    } else {
        Job clone = new Job(job.getConfiguration());
        HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString());

        // We will store all the new /changed properties in the job in the
        // udf context, so the the HCatInputFormat.setInput method need not
        //be called many times.
        for (Entry<String, String> keyValue : job.getConfiguration()) {
            String oldValue = clone.getConfiguration().getRaw(keyValue.getKey());
            if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) {
                udfProps.put(keyValue.getKey(), keyValue.getValue());
            }
        }
        udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true);

        //Store credentials in a private hash map and not the udf context to
        // make sure they are not public.
        Credentials crd = new Credentials();
        crd.addAll(job.getCredentials());
        jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd);
    }

    // Need to also push projections by calling setOutputSchema on
    // HCatInputFormat - we have to get the RequiredFields information
    // from the UdfContext, translate it to an Schema and then pass it
    // The reason we do this here is because setLocation() is called by
    // Pig runtime at InputFormat.getSplits() and
    // InputFormat.createRecordReader() time - we are not sure when
    // HCatInputFormat needs to know about pruned projections - so doing it
    // here will ensure we communicate to HCatInputFormat about pruned
    // projections at getSplits() and createRecordReader() time

    if (requiredFieldsInfo != null) {
        // convert to hcatschema and pass to HCatInputFormat
        try {
            outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass());
            HCatInputFormat.setOutputSchema(job, outputSchema);
        } catch (Exception e) {
            throw new IOException(e);
        }
    } else {
        // else - this means pig's optimizer never invoked the pushProjection
        // method - so we need all fields and hence we should not call the
        // setOutputSchema on HCatInputFormat
        if (HCatUtil.checkJobContextIfRunningFromBackend(job)) {
            try {
                HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA);
                outputSchema = hcatTableSchema;
                HCatInputFormat.setOutputSchema(job, outputSchema);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    }

}