List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:com.sa.npopa.samples.hbase.RowCounter.java
License:Apache License
/** * Sets up the actual job./*ww w . j a v a2 s. co m*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; String startKey = null; String endKey = null; long startTime = 0; long endTime = 0; StringBuilder sb = new StringBuilder(); final String rangeSwitch = "--range="; final String startTimeArgKey = "--starttime="; final String endTimeArgKey = "--endtime="; final String expectedCountArg = "--expected-count="; // First argument is table name, starting from second for (int i = 1; i < args.length; i++) { if (args[i].startsWith(rangeSwitch)) { String[] startEnd = args[i].substring(rangeSwitch.length()).split(",", 2); if (startEnd.length != 2 || startEnd[1].contains(",")) { printUsage("Please specify range in such format as \"--range=a,b\" " + "or, with only one boundary, \"--range=,b\" or \"--range=a,\""); return null; } startKey = startEnd[0]; endKey = startEnd[1]; continue; } if (args[i].startsWith(startTimeArgKey)) { startTime = Long.parseLong(args[i].substring(startTimeArgKey.length())); continue; } if (args[i].startsWith(endTimeArgKey)) { endTime = Long.parseLong(args[i].substring(endTimeArgKey.length())); continue; } if (args[i].startsWith(expectedCountArg)) { conf.setLong(EXPECTED_COUNT_KEY, Long.parseLong(args[i].substring(expectedCountArg.length()))); continue; } // if no switch, assume column names sb.append(args[i]); sb.append(" "); } if (endTime < startTime) { printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime); return null; } Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(RowCounter.class); Scan scan = new Scan(); scan.setCacheBlocks(false); if (startKey != null && !startKey.equals("")) { scan.setStartRow(Bytes.toBytes(startKey)); } if (endKey != null && !endKey.equals("")) { scan.setStopRow(Bytes.toBytes(endKey)); } if (sb.length() > 0) { for (String columnName : sb.toString().trim().split(" ")) { String family = StringUtils.substringBefore(columnName, ":"); String qualifier = StringUtils.substringAfter(columnName, ":"); if (StringUtils.isBlank(qualifier)) { scan.addFamily(Bytes.toBytes(family)); } else { scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier)); } } } scan.setFilter(new FirstKeyOnlyFilter()); scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime); job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); return job; }
From source file:com.sirius.hadoop.job.onlinetime.OnlineTimeJob.java
License:Apache License
public Job build() throws Exception { //init//from ww w . j a v a 2 s.co m Job job = Job.getInstance(getConf(), "onlinetime"); job.setJarByClass(OnlineTimeJob.class); //mapp job.setMapperClass(StatusMapper.class); job.setMapOutputKeyClass(StatusKey.class); job.setMapOutputValueClass(OnlineRecord.class); //custom partition job.setPartitionerClass(StatusKeyPartitioner.class); //reduce job.setGroupingComparatorClass(StatusKeyGroupComparator.class); job.setReducerClass(StatusReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //input FileInputFormat.setInputPaths(job, new Path("/subscriber_status/subscriber_status.json")); //output FileOutputFormat.setOutputPath(job, out); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, Lz4Codec.class); return job; }
From source file:com.streamsets.pipeline.emr.EmrBinding.java
License:Apache License
@Override public void init() throws Exception { Configuration conf = new Configuration(); LOG.info("Arg 0: {}, Arg 1: {}, Arg 2: {}, Arg 3: {}, Arg 4: {}", args[0], args[1], args[2], args[3], args[4]);/*www . j a v a2s .co m*/ try (InputStream in = getClass().getClassLoader().getResourceAsStream("cluster_sdc.properties")) { properties = new Properties(); properties.load(in); String dataFormat = Utils.getHdfsDataFormat(properties); for (Object key : properties.keySet()) { String realKey = String.valueOf(key); // TODO - Override other configs set in HdfsSource if (overriddenConfs.contains(realKey)) { String value = Utils.getPropertyNotNull(properties, realKey); conf.set(realKey, value); } } String javaOpts = args[3]; Integer mapMemoryMb = HadoopMapReduceBinding.getMapMemoryMb(javaOpts, conf); if (mapMemoryMb != null) { conf.set(HadoopMapReduceBinding.MAPREDUCE_MAP_MEMORY_MB, String.valueOf(mapMemoryMb)); } conf.set(HadoopMapReduceBinding.MAPREDUCE_JAVA_OPTS, javaOpts); conf.setBoolean("mapreduce.map.speculative", false); conf.setBoolean("mapreduce.reduce.speculative", false); if ("AVRO".equalsIgnoreCase(dataFormat)) { conf.set(Job.INPUT_FORMAT_CLASS_ATTR, "org.apache.avro.mapreduce.AvroKeyInputFormat"); conf.set(Job.MAP_OUTPUT_KEY_CLASS, "org.apache.avro.mapred.AvroKey"); } conf.set(MRJobConfig.MAP_LOG_LEVEL, args[4]); job = Job.getInstance(conf, "StreamSets Data Collector: " + properties.getProperty(ClusterModeConstants.CLUSTER_PIPELINE_TITLE) + "::" + args[2]); for (String archive : Arrays.asList(args[0].split("\\s*,\\s*"))) { job.addCacheArchive(new URI(archive)); } for (String libJar : Arrays.asList(args[1].split("\\s*,\\s*"))) { job.addFileToClassPath(new Path(libJar)); } job.setJarByClass(this.getClass()); job.setNumReduceTasks(0); if (!"AVRO".equalsIgnoreCase(dataFormat)) { job.setOutputKeyClass(NullWritable.class); } job.setMapperClass(PipelineMapper.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(NullOutputFormat.class); } }
From source file:com.streamsets.pipeline.hadoop.HadoopMapReduceBinding.java
License:Apache License
@Override public void init() throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] remainingArgs = parser.getRemainingArgs(); properties = new Properties(); if (remainingArgs.length != 2) { List<String> argsList = new ArrayList<>(); for (String arg : remainingArgs) { argsList.add("'" + arg + "'"); }//w w w. ja v a 2 s. co m throw new IllegalArgumentException("Error expected properties-file java-opts got: " + argsList); } String propertiesFile = remainingArgs[0]; String javaOpts = remainingArgs[1]; try (InputStream in = new FileInputStream(propertiesFile)) { properties.load(in); String dataFormat = getProperty("dataFormat"); String source = this.getClass().getSimpleName(); for (Object key : properties.keySet()) { String realKey = String.valueOf(key); String value = getProperty(realKey); conf.set(realKey, value, source); } conf.set("mapred.child.java.opts", javaOpts); conf.setBoolean("mapreduce.map.speculative", false); conf.setBoolean("mapreduce.reduce.speculative", false); if (dataFormat.equalsIgnoreCase("AVRO")) { conf.set(Job.INPUT_FORMAT_CLASS_ATTR, "org.apache.avro.mapreduce.AvroKeyInputFormat"); conf.set(Job.MAP_OUTPUT_KEY_CLASS, "org.apache.avro.mapred.AvroKey"); } job = Job.getInstance(conf, "StreamSets Data Collector - Batch Execution Mode"); job.setJarByClass(this.getClass()); job.setNumReduceTasks(0); if (!dataFormat.equalsIgnoreCase("AVRO")) { job.setOutputKeyClass(NullWritable.class); } job.setMapperClass(PipelineMapper.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(NullOutputFormat.class); } }
From source file:com.studium.millionsong.mapreduce.CompleteToStripped.java
public static void main(String[] args) throws Exception { Path inputPath = new Path("/millionSong/complete.csv"); Path outputPath = new Path("/millionSong/result/run1"); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "From complete to stripped dataset"); // Job configuration: // 0. Set har which contains this classes job.setJarByClass(CompleteToStripped.class); // 1. Which Mapper and Reduce should be used job.setMapperClass(CompleteStrippedMapper.class); job.setReducerClass(StrippedReducer.class); // 2. Which are the output datatypes of the mapper- and reduce-functions job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // 3. Set local combiner for data reduction job.setCombinerClass(StrippedReducer.class); // 4. Where are the input file(s) // Default FileInputFormat is TextInputFormat, so its using // the correct implementation automatically. FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.telefonica.iot.tidoop.apiext.hadoop.ckan.CKANInputFormatTest.java
License:Open Source License
/** * Sets up tests by creating a unique instance of the tested class, and by defining the behaviour of the mocked * classes./*from ww w. j a v a2 s. c om*/ * * @throws Exception */ @Before public void setUp() throws Exception { // set up the instance of the tested class ckanInputFormat = new CKANInputFormat(); // set up the other instances conf = new Configuration(); job = Job.getInstance(conf, "testGetSplitsResource"); // set up the behavious of the mocked classes when(backend.getNumRecords(resId)).thenReturn(numRecords); }
From source file:com.telefonica.iot.tidoop.apiext.hadoop.ckan.CKANOutputFormatTest.java
License:Open Source License
/** * Sets up tests by creating a unique instance of the tested class, and by defining the behaviour of the mocked * classes.//from w w w.ja v a2 s. c om * * @throws Exception */ @Before public void setUp() throws Exception { // set up the instance of the tested class ckanOutputFormat = new CKANOutputFormat(); // set up the other instances conf = new Configuration(); job = Job.getInstance(conf, "CKANOutputFormatTest"); // set up the behavious of the mocked classes when(backend.getNumRecords(resId)).thenReturn(numRecords); }
From source file:com.telefonica.iot.tidoop.apiext.utils.CKANMapReduceExample.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // check the number of arguments, show the usage if it is wrong if (args.length != 7) { showUsage();//w w w. j a va 2s . com return -1; } // if // get the arguments String ckanHost = args[0]; String ckanPort = args[1]; boolean sslEnabled = args[2].equals("true"); String ckanAPIKey = args[3]; String ckanInputs = args[4]; String ckanOutput = args[5]; String splitsLength = args[6]; // create and configure a MapReduce job Configuration conf = this.getConf(); Job job = Job.getInstance(conf, "CKAN MapReduce test"); job.setJarByClass(CKANMapReduceExample.class); job.setMapperClass(RecordSizeGetter.class); job.setCombinerClass(RecordSizeAdder.class); job.setReducerClass(RecordSizeAdder.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(CKANInputFormat.class); CKANInputFormat.setInput(job, ckanInputs); CKANInputFormat.setEnvironment(job, ckanHost, ckanPort, sslEnabled, ckanAPIKey); CKANInputFormat.setSplitsLength(job, splitsLength); job.setOutputFormatClass(CKANOutputFormat.class); CKANOutputFormat.setEnvironment(job, ckanHost, ckanPort, sslEnabled, ckanAPIKey); CKANOutputFormat.setOutputPkg(job, ckanOutput); // run the MapReduce job return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.telefonica.iot.tidoop.mrlib.jobs.Filter.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // check the number of arguments, show the usage if it is wrong if (args.length != 3) { showUsage();//from w ww . j av a2 s.c o m return -1; } // if // get the arguments String input = args[0]; String output = args[1]; String regex = args[2]; // create and configure a MapReduce job Configuration conf = this.getConf(); conf.set(Constants.PARAM_REGEX, regex); Job job = Job.getInstance(conf, "tidoop-mr-lib-filter"); job.setNumReduceTasks(1); job.setJarByClass(Filter.class); job.setMapperClass(LineFilter.class); job.setCombinerClass(LinesCombiner.class); job.setReducerClass(LinesJoiner.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // run the MapReduce job return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.telefonica.iot.tidoop.mrlib.jobs.MapOnly.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // check the number of arguments, show the usage if it is wrong if (args.length != 3) { showUsage();//from www. j a v a 2s .c o m return -1; } // if // get the arguments String input = args[0]; String output = args[1]; String mapFunction = args[2]; // create and configure a MapReduce job Configuration conf = this.getConf(); conf.set(Constants.PARAM_FUNCTION, mapFunction); Job job = Job.getInstance(conf, "tidoop-mr-lib-maponly"); job.setNumReduceTasks(0); job.setJarByClass(MapOnly.class); job.setMapperClass(CustomMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); // run the MapReduce job return job.waitForCompletion(true) ? 0 : 1; }