List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.rockstor.compact.RecoveryTool.java
License:Apache License
private Job createSubmittableJob(Configuration conf) throws IOException { Job job = new Job(conf, NAME); job.setJarByClass(RecoveryTool.class); job.setInputFormatClass(CompactDirInputFormat.class); job.setMapOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapperClass(RecoveryMapper.class); job.setNumReduceTasks(0);/*from w ww . j av a 2s. c o m*/ job.setOutputFormatClass(NullOutputFormat.class); LOG.info("init job " + NAME + " OK!"); return job; }
From source file:com.sa.npopa.samples.hbase.FindBadMOBReferences.java
License:Apache License
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0];// w w w. j a v a 2s. co m Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(FindBadMOBReferences.class); Scan scan = new Scan(); scan.setCacheBlocks(false); scan.setBatch(10); scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE)); scan.addFamily(Bytes.toBytes("J")); //scan.setRowPrefixFilter(Bytes.toBytes("a00")); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, FindBadMOBReferencesMapper.class, Text.class, Text.class, job); //job.setNumReduceTasks(0); job.setReducerClass(FindBadMOBReferencesReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path("/tmp/out")); return job; }
From source file:com.savy3.nonequijoin.MapOutputSampler.java
License:Apache License
/** * Driver for InputSampler MapReduce Job *//*from ww w . java 2 s .c o m*/ public static void runMap(Job job, Path sampleInputPath) throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException { LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString()); Configuration conf = new Configuration(); conf.setBoolean("mapreduce.job.ubertask.enable", true); conf.set("numSamples", "" + (job.getNumReduceTasks() - 1)); Job sampleJob = new Job(conf); sampleJob.setMapperClass(job.getMapperClass()); sampleJob.setReducerClass(SampleKeyReducer.class); sampleJob.setJarByClass(job.getMapperClass()); sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass()); sampleJob.setMapOutputValueClass(job.getMapOutputValueClass()); sampleJob.setOutputKeyClass(job.getMapOutputKeyClass()); sampleJob.setOutputValueClass(NullWritable.class); sampleJob.setInputFormatClass(SequenceFileInputFormat.class); sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath); FileSystem fs = FileSystem.get(conf); Path out = new Path(sampleInputPath.getParent(), "mapOut"); fs.delete(out, true); SequenceFileOutputFormat.setOutputPath(sampleJob, out); sampleJob.waitForCompletion(true); LOG.info("Sample MapReduce Job Output File" + out.toString()); Path partFile = new Path(out, "part-r-00000"); Path tmpFile = new Path("/_tmp"); fs.delete(tmpFile, true); fs.rename(partFile, tmpFile); fs.delete(sampleInputPath.getParent(), true); fs.rename(new Path("/_tmp"), sampleInputPath.getParent()); LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString()); }
From source file:com.shopzilla.hadoop.mapreduce.MiniMRClusterContextMRTest.java
License:Apache License
@Test public void testWordCount() throws Exception { Path input = new Path("/user/test/keywords_data"); Path output = new Path("/user/test/word_count"); Job job = new Job(configuration); job.setJobName("Word Count Test"); job.setMapperClass(WordCountMapper.class); job.setReducerClass(SumReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1);//w w w. j ava2 s .co m FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); assertTrue("All files from /data classpath directory should have been copied into HDFS", miniMRClusterContext.getFileSystem().exists(input)); job.waitForCompletion(true); assertTrue("Output file should have been created", miniMRClusterContext.getFileSystem().exists(output)); final LinkedList<String> expectedLines = new LinkedList<String>(); expectedLines.add("goodbye\t1"); expectedLines.add("hello\t1"); expectedLines.add("world\t2"); miniMRClusterContext.processData(output, new Function<String, Void>() { @Override public Void apply(String line) { assertEquals(expectedLines.pop(), line); return null; } }); assertEquals(0, expectedLines.size()); }
From source file:com.sirius.hadoop.job.onlinetime.OnlineTimeJob.java
License:Apache License
public Job build() throws Exception { //init/* w w w .j av a 2s . c o m*/ Job job = Job.getInstance(getConf(), "onlinetime"); job.setJarByClass(OnlineTimeJob.class); //mapp job.setMapperClass(StatusMapper.class); job.setMapOutputKeyClass(StatusKey.class); job.setMapOutputValueClass(OnlineRecord.class); //custom partition job.setPartitionerClass(StatusKeyPartitioner.class); //reduce job.setGroupingComparatorClass(StatusKeyGroupComparator.class); job.setReducerClass(StatusReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //input FileInputFormat.setInputPaths(job, new Path("/subscriber_status/subscriber_status.json")); //output FileOutputFormat.setOutputPath(job, out); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, Lz4Codec.class); return job; }
From source file:com.soteradefense.dga.louvain.mapreduce.CommunityCompression.java
License:Apache License
public int run(String[] args) throws Exception { Configuration mrConf = this.getConf(); for (java.util.Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) { mrConf.set(entry.getKey(), entry.getValue()); }//from ww w . ja va 2 s . c o m Job job = Job.getInstance(mrConf); job.setJarByClass(CommunityCompression.class); Path in = new Path(inputPath); Path out = new Path(outputPath); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("CommunityCompression"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LouvainVertexWritable.class); job.setMapperClass(CommunityCompression.Map.class); job.setReducerClass(CommunityCompression.Reduce.class); logger.debug("Running Mapreduce step with job configuration: {}", job); return job.waitForCompletion(false) ? 0 : 1; }
From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizer.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = null; try {//from w w w . jav a 2s.c o m int iteration = 0; if (!basePath.endsWith("/")) basePath = basePath + "/"; String inputPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + iteration; String joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1); String outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration; Configuration mrConf = this.getConf(); job = Job.getInstance(mrConf); for (Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) { mrConf.set(entry.getKey(), entry.getValue()); } FileSystem fs = FileSystem.get(job.getConfiguration()); boolean nextFileExists = fs.exists(new Path(joinPath)); while (nextFileExists) { System.out.println("Processing " + inputPath + " and " + joinPath); job = Job.getInstance(mrConf); job.setJobName("Louvain Table Synthesizer " + iteration); job.setJarByClass(LouvainTableSynthesizer.class); job.setMapperClass(LouvainTableSynthesizerMapper.class); job.setReducerClass(LouvainTableSynthesizerReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //Reducer Output job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); //Add both input folders Path in = new Path(inputPath); Path joinIn = new Path(joinPath); Path out = new Path(outputPath); FileInputFormat.addInputPath(job, in); FileInputFormat.addInputPath(job, joinIn); FileOutputFormat.setOutputPath(job, out); job.waitForCompletion(true); //Set the new temp input path inputPath = outputPath; iteration++; outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration; joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1); nextFileExists = fs.exists(new Path(joinPath)); } } catch (IOException e) { e.printStackTrace(); return -1; } catch (InterruptedException e) { e.printStackTrace(); return -1; } catch (ClassNotFoundException e) { e.printStackTrace(); return -1; } return 0; }
From source file:com.soteradefense.dga.LouvainRunner.java
License:Apache License
private int runMapreduceJob(String inputPath, String outputPath, DGAConfiguration conf) throws Exception { Configuration mrConf = new Configuration(); for (Map.Entry<String, String> entry : conf.getSystemProperties().entrySet()) { mrConf.set(entry.getKey(), entry.getValue()); }//from w w w. j a v a2 s. c o m Job job = Job.getInstance(configuration); job.setJarByClass(LouvainRunner.class); Path in = new Path(inputPath); Path out = new Path(outputPath); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("CommunityCompression"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LouvainVertexWritable.class); job.setMapperClass(CommunityCompression.Map.class); job.setReducerClass(CommunityCompression.Reduce.class); logger.debug("Running Mapreduce step with job configuration: {}", job); return job.waitForCompletion(false) ? 0 : 1; }
From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java
License:Apache License
/** * Use this before submitting a TableMap job. It will appropriately set up * the job./* ww w. j av a2 s . c o m*/ * * @param table The Splice table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(String table, Scan scan, Class<? extends Mapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Object> outputValueClass, Job job, boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass) throws IOException { job.setInputFormatClass(inputFormatClass); if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass); if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass); if (mapper != null) job.setMapperClass(mapper); job.getConfiguration().set(MRConstants.SPLICE_INPUT_TABLE_NAME, table); job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan)); if (addDependencyJars) { addDependencyJars(job); } }
From source file:com.splunk.shuttl.integration.hadoop.hbase.CSVJobFactory.java
License:Apache License
/** * @return the hadoopConfiguration/*from ww w .j a v a2 s . c o m*/ * @throws IOException */ public static Job getConfiguredJob(String[] arguments) throws IOException { Configuration jobConfiguration = new Configuration(true); // Load hbase-site.xml HBaseConfiguration.addHbaseResources(jobConfiguration); jobConfiguration.set("fs.default.name", arguments[0]); jobConfiguration.set("mapred.job.tracker", arguments[1]); jobConfiguration.set(JobConfigurationConstants.FILENAME, arguments[2]); jobConfiguration.set(JobConfigurationConstants.OUTPUT_PATH, arguments[3]); jobConfiguration.set(JobConfigurationConstants.TABLE_NAME, arguments[4]); jobConfiguration.set(JobConfigurationConstants.COLUMN_FAMILY, "d"); Job job = new Job(jobConfiguration, "BucketToHbase"); job.setJarByClass(CSVMapper.class); job.setMapperClass(CSVMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); job.setInputFormatClass(TextInputFormat.class); return job; }