List of usage examples for org.apache.hadoop.mapreduce Job getCredentials
public Credentials getCredentials()
From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java
License:Apache License
/** * test org.apache.hadoop.mapreduce.pipes.PipesReducer * test the transfer of data: key and value * * @throws Exception/*ww w . jav a2s . c om*/ */ @Test public void testPipesReducer() throws Exception { System.err.println("testPipesReducer"); File[] psw = cleanTokenPasswordFile(); try { JobID jobId = new JobID("201408272347", 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0); Job job = new Job(new Configuration()); job.setJobID(jobId); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString()); FileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub"); conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath()); System.err.println("fCommand" + fCommand.getAbsolutePath()); Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service")); TokenCache.setJobToken(token, job.getCredentials()); conf.setBoolean(MRJobConfig.SKIP_RECORDS, true); TestReporter reporter = new TestReporter(); DummyInputFormat input_format = new DummyInputFormat(); List<InputSplit> isplits = input_format.getSplits(job); InputSplit isplit = isplits.get(0); TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid); RecordWriter<IntWritable, Text> writer = new TestRecordWriter( new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile")); BooleanWritable bw = new BooleanWritable(true); List<Text> texts = new ArrayList<Text>(); texts.add(new Text("first")); texts.add(new Text("second")); texts.add(new Text("third")); DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator(); ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>( conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class, Text.class); PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>(); reducer.setup(context); initStdOut(conf); reducer.reduce(bw, texts, context); reducer.cleanup(context); String stdOut = readStdOut(conf); // test data: key assertTrue(stdOut.contains("reducer key :true")); // and values assertTrue(stdOut.contains("reduce value :first")); assertTrue(stdOut.contains("reduce value :second")); assertTrue(stdOut.contains("reduce value :third")); } finally { if (psw != null) { // remove password files for (File file : psw) { file.deleteOnExit(); } } } }
From source file:org.apache.accumulo.core.client.mapreduce.AbstractInputFormat.java
License:Apache License
/** * Sets the connector information needed to communicate with Accumulo in this job. * * <p>/*from w w w . j a v a 2 s.c o m*/ * <b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the configuration as a means to pass the token to MapReduce tasks. This * information is BASE64 encoded to provide a charset safe conversion to a string, but this conversion is not intended to be secure. {@link PasswordToken} is * one example that is insecure in this way; however {@link DelegationToken}s, acquired using * {@link SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern. * * @param job * the Hadoop job instance to be configured * @param principal * a valid Accumulo user name (user must have Table.CREATE permission) * @param token * the user's password * @since 1.5.0 */ public static void setConnectorInfo(Job job, String principal, AuthenticationToken token) throws AccumuloSecurityException { if (token instanceof KerberosToken) { log.info("Received KerberosToken, attempting to fetch DelegationToken"); try { Instance instance = getInstance(job); Connector conn = instance.getConnector(principal, token); token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig()); } catch (Exception e) { log.warn( "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo", e); } } // DelegationTokens can be passed securely from user to task without serializing insecurely in the configuration if (token instanceof DelegationTokenImpl) { DelegationTokenImpl delegationToken = (DelegationTokenImpl) token; // Convert it into a Hadoop Token AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier(); Token<AuthenticationTokenIdentifier> hadoopToken = new Token<>(identifier.getBytes(), delegationToken.getPassword(), identifier.getKind(), delegationToken.getServiceName()); // Add the Hadoop Token to the Job so it gets serialized and passed along. job.getCredentials().addToken(hadoopToken.getService(), hadoopToken); } InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token); }
From source file:org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat.java
License:Apache License
/** * Sets the connector information needed to communicate with Accumulo in this job. * * <p>//from w w w .j a va2 s . c om * <b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the configuration as a means to pass the token to MapReduce tasks. This * information is BASE64 encoded to provide a charset safe conversion to a string, but this conversion is not intended to be secure. {@link PasswordToken} is * one example that is insecure in this way; however {@link DelegationToken}s, acquired using * {@link SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern. * * @param job * the Hadoop job instance to be configured * @param principal * a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(Job, boolean)} is set to true) * @param token * the user's password * @since 1.5.0 */ public static void setConnectorInfo(Job job, String principal, AuthenticationToken token) throws AccumuloSecurityException { if (token instanceof KerberosToken) { log.info("Received KerberosToken, attempting to fetch DelegationToken"); try { Instance instance = getInstance(job); Connector conn = instance.getConnector(principal, token); token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig()); } catch (Exception e) { log.warn( "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo", e); } } // DelegationTokens can be passed securely from user to task without serializing insecurely in the configuration if (token instanceof DelegationTokenImpl) { DelegationTokenImpl delegationToken = (DelegationTokenImpl) token; // Convert it into a Hadoop Token AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier(); Token<AuthenticationTokenIdentifier> hadoopToken = new Token<>(identifier.getBytes(), delegationToken.getPassword(), identifier.getKind(), delegationToken.getServiceName()); // Add the Hadoop Token to the Job so it gets serialized and passed along. job.getCredentials().addToken(hadoopToken.getService(), hadoopToken); } OutputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token); }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AbstractInputFormat.java
License:Apache License
/** * Sets connection information needed to communicate with Accumulo for this job * * @param job//w ww.ja v a2 s . c om * Hadoop job instance to be configured * @param info * Connection information for Accumulo * @since 2.0.0 */ public static void setClientInfo(Job job, ClientInfo info) { ClientInfo inputInfo = InputConfigurator.updateToken(job.getCredentials(), info); InputConfigurator.setClientInfo(CLASS, job.getConfiguration(), inputInfo); }
From source file:org.apache.falcon.replication.CustomReplicator.java
License:Apache License
@Override protected Path createInputFileListing(Job job) throws IOException { Path fileListingPath = getFileListingPath(); FilteredCopyListing copyListing = new FilteredCopyListing(job.getConfiguration(), job.getCredentials()); copyListing.buildListing(fileListingPath, inputOptions); LOG.info("Number of paths considered for copy: {}", copyListing.getNumberOfPaths()); LOG.info("Number of bytes considered for copy: {} (Actual number of bytes copied depends on whether " + "any files are skipped or overwritten)", copyListing.getBytesToCopy()); return fileListingPath; }
From source file:org.apache.flink.api.java.hadoop.mapreduce.HadoopOutputFormatBase.java
License:Apache License
public HadoopOutputFormatBase(org.apache.hadoop.mapreduce.OutputFormat<K, V> mapreduceOutputFormat, Job job) { super(job.getCredentials()); this.mapreduceOutputFormat = mapreduceOutputFormat; this.configuration = job.getConfiguration(); HadoopUtils.mergeHadoopConf(configuration); }
From source file:org.apache.hcatalog.hbase.ImportSequenceFile.java
License:Apache License
/** * Method to run the Importer MapReduce Job. Normally will be called by another MR job * during OutputCommitter.commitJob().//from w w w . j a v a 2 s . com * @param parentContext JobContext of the parent job * @param tableName name of table to bulk load data into * @param InputDir path of SequenceFile formatted data to read * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported * @return */ static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) { Configuration parentConf = parentContext.getConfiguration(); Configuration conf = new Configuration(); for (Map.Entry<String, String> el : parentConf) { if (el.getKey().startsWith("hbase.")) conf.set(el.getKey(), el.getValue()); if (el.getKey().startsWith("mapred.cache.archives")) conf.set(el.getKey(), el.getValue()); } //Inherit jar dependencies added to distributed cache loaded by parent job conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", "")); conf.set("mapreduce.job.cache.archives.visibilities", parentConf.get("mapreduce.job.cache.archives.visibilities", "")); //Temporary fix until hbase security is ready //We need the written HFile to be world readable so //hbase regionserver user has the privileges to perform a hdfs move if (parentConf.getBoolean("hadoop.security.authorization", false)) { FsPermission.setUMask(conf, FsPermission.valueOf("----------")); } conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false); boolean localMode = "local".equals(conf.get("mapred.job.tracker")); boolean success = false; try { FileSystem fs = FileSystem.get(parentConf); Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR); if (!fs.mkdirs(workDir)) throw new IOException("Importer work directory already exists: " + workDir); Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode); job.setWorkingDirectory(workDir); job.getCredentials().addAll(parentContext.getCredentials()); success = job.waitForCompletion(true); fs.delete(workDir, true); //We only cleanup on success because failure might've been caused by existence of target directory if (localMode && success) { new ImporterOutputFormat().getOutputCommitter( org.apache.hadoop.mapred.HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID())) .commitJob(job); } } catch (InterruptedException e) { LOG.error("ImportSequenceFile Failed", e); } catch (ClassNotFoundException e) { LOG.error("ImportSequenceFile Failed", e); } catch (IOException e) { LOG.error("ImportSequenceFile Failed", e); } return success; }
From source file:org.apache.hcatalog.mapreduce.HCatOutputFormat.java
License:Apache License
/** * @see org.apache.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo) *///from ww w .ja v a2s . c o m public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException { setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo); }
From source file:org.apache.hcatalog.mapreduce.Security.java
License:Apache License
void handleSecurity(Job job, OutputJobInfo outputJobInfo, HiveMetaStoreClient client, Configuration conf, boolean harRequested) throws IOException, MetaException, TException, Exception { handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested); }
From source file:org.apache.hcatalog.pig.HCatLoader.java
License:Apache License
@Override public void setLocation(String location, Job job) throws IOException { HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); UDFContext udfContext = UDFContext.getUDFContext(); Properties udfProps = udfContext.getUDFProperties(this.getClass(), new String[] { signature }); job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); Pair<String, String> dbTablePair = PigHCatUtil.getDBTableNames(location); dbName = dbTablePair.first;//from ww w. j a v a 2s.c o m tableName = dbTablePair.second; RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps.get(PRUNE_PROJECTION_INFO); // get partitionFilterString stored in the UDFContext - it would have // been stored there by an earlier call to setPartitionFilter // call setInput on HCatInputFormat only in the frontend because internally // it makes calls to the hcat server - we don't want these to happen in // the backend // in the hadoop front end mapred.task.id property will not be set in // the Configuration if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements();) { PigHCatUtil.getConfigFromUDFProperties(udfProps, job.getConfiguration(), emr.nextElement().toString()); } if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { //Combine credentials and credentials from job takes precedence for freshness Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); crd.addAll(job.getCredentials()); job.getCredentials().addAll(crd); } } else { Job clone = new Job(job.getConfiguration()); HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString()); // We will store all the new /changed properties in the job in the // udf context, so the the HCatInputFormat.setInput method need not //be called many times. for (Entry<String, String> keyValue : job.getConfiguration()) { String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { udfProps.put(keyValue.getKey(), keyValue.getValue()); } } udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); //Store credentials in a private hash map and not the udf context to // make sure they are not public. Credentials crd = new Credentials(); crd.addAll(job.getCredentials()); jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); } // Need to also push projections by calling setOutputSchema on // HCatInputFormat - we have to get the RequiredFields information // from the UdfContext, translate it to an Schema and then pass it // The reason we do this here is because setLocation() is called by // Pig runtime at InputFormat.getSplits() and // InputFormat.createRecordReader() time - we are not sure when // HCatInputFormat needs to know about pruned projections - so doing it // here will ensure we communicate to HCatInputFormat about pruned // projections at getSplits() and createRecordReader() time if (requiredFieldsInfo != null) { // convert to hcatschema and pass to HCatInputFormat try { outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } else { // else - this means pig's optimizer never invoked the pushProjection // method - so we need all fields and hence we should not call the // setOutputSchema on HCatInputFormat if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { try { HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); outputSchema = hcatTableSchema; HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); } } } }