List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.bizosys.hsearch.kv.indexing.KVIndexer.java
License:Apache License
private static int runJob(int jobTypeI, Job job, FieldMapping fm, String input, String output, int scannerCacheSize, String filter) throws IOException, InterruptedException, ClassNotFoundException { int jobStatus = -1; switch (jobTypeI) { case SF2HB: { IdSearchLog.l.info("Starting Job for SF2HB input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(KVReducerHBase.class); TableMapReduceUtil.initTableReducerJob(fm.tableName, KVReducerHBase.class, job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; }/* w w w.j av a 2 s. com*/ case SF2HF: { //First creates map file and then convert to hfile. //create intermediate dir for map file output String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for SF2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("SF2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case SF2MF: { IdSearchLog.l.info("Starting Job for SF2MF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); job.setSortComparatorClass(TextPair.FirstComparator.class); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ImmutableBytesWritable.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case MF2HB: { job.setMapperClass(KVMapperMapFile.class); job.setInputFormatClass(SequenceFileAsTextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); SequenceFileAsTextInputFormat.addInputPath(job, new Path(input)); job.setReducerClass(KVReducerHBase.class); TableMapReduceUtil.initTableReducerJob(fm.tableName, KVReducerHBase.class, job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case MF2HF: { String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for HB2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("MF2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case MF2MF: { job.setMapperClass(KVMapperMapFile.class); job.setInputFormatClass(SequenceFileAsTextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); SequenceFileAsTextInputFormat.addInputPath(job, new Path(input)); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ImmutableBytesWritable.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case HB2HB: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class TextPair.class, // mapper output key Text.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob(fm.tableName, // output table KVReducerHBase.class, // reducer class job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case HB2HF: { String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for HB2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("HB2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case HB2MF: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class TextPair.class, // mapper output key Text.class, // mapper output value job); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ImmutableBytesWritable.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case IMF2HF: { Path finalOutputDir = new Path(output); job.setJarByClass(KVIndexer.class); job.setMapperClass(KVMapperHFile.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, finalOutputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable hTable = new HTable(job.getConfiguration(), fm.tableName); HFileOutputFormat.configureIncrementalLoad(job, hTable); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } default: throw new IOException("Invalid Jobtype " + jobTypeI); } }
From source file:com.bizosys.hsearch.kv.indexing.KVReplicatorHFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { int seq = 0;// w w w . j ava2s . co m String inputFile = (args.length > seq) ? args[seq] : ""; seq++; String hfileOutputFile = (args.length > seq) ? args[seq] : ""; seq++; String tableName = (args.length > seq) ? args[seq] : ""; seq++; String familyName = (args.length > seq) ? args[seq] : "1"; seq++; String replaceFrom = (args.length > seq) ? args[seq] : ""; seq++; String replaceTo = (args.length > seq) ? args[seq] : ""; seq++; String startIndex = (args.length > seq) ? args[seq] : ""; seq++; String endIndex = (args.length > seq) ? args[seq] : ""; seq++; if (null == inputFile || inputFile.trim().isEmpty()) { String err = KVReplicatorHFile.class + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); conf.set(TABLE_NAME, tableName); conf.set(FAMILY_NAME, familyName); conf.set(REPLACE_FROM, replaceFrom); conf.set(REPLACE_TO, replaceTo); conf.set(START_INDEX, startIndex); conf.set(END_INDEX, endIndex); try { List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>(); HColumnDescriptor cols = new HColumnDescriptor(familyName.getBytes()); colFamilies.add(cols); HDML.create(tableName, colFamilies); } catch (HBaseException e) { e.printStackTrace(); } Job job = Job.getInstance(conf, "KVReplicatorHBase - creating HFile"); job.setJarByClass(KVReplicatorHFile.class); job.setMapperClass(KVHFileWriterMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputFile.trim())); FileOutputFormat.setOutputPath(job, new Path(hfileOutputFile.trim())); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable hTable = new HTable(conf, tableName); HFileOutputFormat.configureIncrementalLoad(job, hTable); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.bizosys.hsearch.kv.indexing.KVReplicatorMapFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { int seq = 0;//from w ww . j a v a 2 s. c o m String inputFile = (args.length > seq) ? args[seq] : ""; seq++; String outputFile = (args.length > seq) ? args[seq++] : "/tmp/hsearch-index"; String outputFileName = (args.length > seq) ? args[seq++] : "file1"; String xmlFilePath = (args.length > seq) ? args[seq++] : ""; String replaceFrom = (args.length > seq) ? args[seq++] : ""; String replaceTo = (args.length > seq) ? args[seq++] : ""; String startIndex = (args.length > seq) ? args[seq++] : ""; String endIndex = (args.length > seq) ? args[seq++] : ""; String numberOfReducerStr = (args.length > seq) ? args[seq] : "1"; int numberOfReducer = Integer.parseInt(numberOfReducerStr); if (null == inputFile || inputFile.trim().isEmpty()) { String err = KVReplicatorHFile.class + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); FieldMapping fm = KVIndexer.createFieldMapping(conf, xmlFilePath, new StringBuilder()); outputFile = outputFile.charAt(outputFile.length() - 1) == '/' ? outputFile : outputFile + "/"; outputFile = outputFile + fm.tableName; conf.set(OUTPUT_FILE_PATH, outputFile); conf.set(OUTPUT_FILE_NAME, outputFileName); conf.set(REPLACE_FROM, replaceFrom); conf.set(REPLACE_TO, replaceTo); conf.set(START_INDEX, startIndex); conf.set(END_INDEX, endIndex); Job job = Job.getInstance(conf, "KVReplicatorMapFile - Replicating Map File"); job.setJarByClass(KVReplicatorMapFile.class); job.setMapperClass(KVReplicatorMapper.class); job.setReducerClass(KVReplicatorReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(numberOfReducer); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputFile.trim())); FileSystem fs = FileSystem.get(conf); Path dummyPath = new Path("/tmp", "dummy"); if (fs.exists(dummyPath)) { fs.delete(dummyPath, true); } FileOutputFormat.setOutputPath(job, dummyPath); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.bizosys.oneline.maintenance.Import.java
License:Apache License
/** * Main entry point.//from w ww.j av a 2 s. co m * * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } Job job = createSubmittableJob(conf, otherArgs); int status = job.waitForCompletion(true) ? 0 : 1; String params = (null == args) ? "" : StringUtils.arrayToString(args, '\t'); if (status == 1) { String msg = "Error in Job completetion Params\n tablename \t inputputdir\n " + params; System.out.println(msg); throw new Exception(msg); } }
From source file:com.blackberry.logdriver.util.Cat.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w ww . jav a 2 s.c om FileSystem fs = FileSystem.get(conf); // The command line options List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 2) { System.out.println("usage: [genericOptions] input [input ...] output"); System.exit(1); } // Get the files we need from the command line. for (int i = 0; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Cat.class); jobConf.setIfUnset("mapred.job.name", "Cat Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(CatMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.FastSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }// w ww . j av a 2 s . c o m FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(FastSearch.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8"))); job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.Grep.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w w w.j a va 2 s .c o m FileSystem fs = FileSystem.get(conf); // The command line options String regex = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] regex input [input ...] output"); System.exit(1); } // Get the files we need from the command line. regex = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Grep.class); jobConf.setIfUnset("mapred.job.name", "Grep Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8"))); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(GrepMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.MultiSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/*w w w. j ava 2s.c om*/ FileSystem fs = FileSystem.get(conf); // The command line options String searchStringDir = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchStringDir = args[0]; // We are going to be reading all the files in this directory a lot. So // let's up the replication factor by a lot so that they're easy to read. for (FileStatus f : fs.listStatus(new Path(searchStringDir))) { fs.setReplication(f.getPath(), (short) 16); } for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(MultiSearch.class); jobConf.setIfUnset("mapred.job.name", "MultiSearch"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string.dir", searchStringDir); // This search is generally too fast to make good use of 128MB blocks, so // let's set the value to 256MB (if it's not set already) if (jobConf.get("mapred.max.split.size") == null) { jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024); } job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.Search.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }// w ww . j a v a2 s. c o m FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Search.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", searchString); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blogclustermr.EdgeLister.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job; job = Job.getInstance(conf, "edge_lister"); job.setJarByClass(EdgeLister.class); job.setMapperClass(EdgeMapper.class); //job.setCombinerClass(EdgeWeightReducer.class); job.setReducerClass(EdgeWeightReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); System.exit(job.waitForCompletion(true) ? 0 : 1); }