List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.bigdog.hadoop.mapreduce.group.GroupApp.java
public void group() throws Exception { final Configuration configuration = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), configuration); if (fileSystem.exists(new Path(OUT_PATH))) { fileSystem.delete(new Path(OUT_PATH), true); }//from w w w .j a v a2 s. co m final Job job = new Job(configuration, GroupApp.class.getSimpleName()); //1.1 FileInputFormat.setInputPaths(job, INPUT_PATH); //?? job.setInputFormatClass(TextInputFormat.class); //1.2Mapper job.setMapperClass(MyMapper.class); //<k2,v2> job.setMapOutputKeyClass(NewK2.class); job.setMapOutputValueClass(LongWritable.class); //1.3 job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(1); //1.4 TODO ?? job.setGroupingComparatorClass(MyGroupingComparator.class); //1.5 TODO ?? //2.2 reduce job.setReducerClass(MyReducer.class); //<k3,v3> job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); //2.3 FileOutputFormat.setOutputPath(job, new Path(OUT_PATH)); //? job.setOutputFormatClass(TextOutputFormat.class); //???JobTracker job.waitForCompletion(true); }
From source file:com.bigdog.hadoop.mapreduce.partition.KpiApp.java
public void kpi() throws Exception { final Job job = new Job(new Configuration(), KpiApp.class.getSimpleName()); job.setJarByClass(KpiApp.class); //1.1 // w w w.ja v a 2 s . c o m FileInputFormat.setInputPaths(job, INPUT_PATH); //?? job.setInputFormatClass(TextInputFormat.class); //1.2Mapper job.setMapperClass(MyMapper.class); //<k2,v2> job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(KpiWritable.class); //1.3 job.setPartitionerClass(KpiPartitioner.class); job.setNumReduceTasks(2); //1.4 TODO ?? //1.5 TODO ?? //2.2 reduce job.setReducerClass(MyReducer.class); //<k3,v3> job.setOutputKeyClass(Text.class); job.setOutputValueClass(KpiWritable.class); //2.3 FileOutputFormat.setOutputPath(job, new Path(OUT_PATH)); //? job.setOutputFormatClass(TextOutputFormat.class); //???JobTracker job.waitForCompletion(true); }
From source file:com.bigfishgames.biginsights.upsight.mapreduce.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: AvroWordCount <input path> <output path>"); return -1; }/*w w w. j a va 2s. c o m*/ Job job = new Job(getConf()); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("wordcount"); // We call setOutputSchema first so we can override the configuration // parameters it sets // AvroJob.setOutputKeySchema(job, // Pair.getPairSchema(Schema.create(Type.STRING), // Schema.create(Type.NULL))); AvroJob.setOutputKeySchema(job, Event.getClassSchema()); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MyAvroKeyOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0; }
From source file:com.bizosys.hsearch.kv.indexer.KVIndexer.java
License:Apache License
/** * Given a indexing parameters it starts a indexing. * Different indexing type are://from w w w . ja v a 2s.c o m * SF2HB = Simple File(csv,tsv) to hbase directly. * SF2HF = Simple File(csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * SF2MF = Simple File(csv,tsv) to MapFile (key as {@link Text} and value as {@link BytesWritable}) * MF2HB = Map File(key and value as csv,tsv) to hbase. * MF2HF = Map File(key and value as csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * MF2MF = Map File(key and value as csv,tsv) to MapFile(key as {@link Text} and value as {@link BytesWritable}) * HB2HB = Hbase to Hbase * HB2HF = Hbase to HFile which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * HB2MF = Hbase to MapFile(key as {@link Text} and value as {@link BytesWritable}) * @param args * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void execute(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length < 7) { String err = "Usage : " + KVIndexer.class + " <<Job Type(SF2HB|SF2HF|SF2MF...)>> <<Input Source>> <<Output Sink>> <<XML File Configuration>> <<Skip Header(true|false)>> <<Run KeyGeneration Job>> <<Number Of reducer>> <<Speculative Execution>> <<scanner-cache-size>> <<filter>>"; IdSearchLog.l.fatal(err); System.exit(1); } String msg = this.getClass().getName() + " > Initializing indexer job."; IdSearchLog.l.info(msg); int seq = 0; int len = args.length; String jobType = (len > seq) ? args[seq++] : ""; String inputSource = (len > seq) ? args[seq++] : ""; String outputSink = (len > seq) ? args[seq++] : "/tmp/hsearch-index"; String xmlFilePath = (len > seq) ? args[seq++] : ""; String skipHeader = (len > seq) ? args[seq++] : "false"; boolean runKeyGenJob = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : false; int numberOfReducer = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 1; boolean speculativeExecution = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : true; int scannerCacheSize = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 300; String filter = (len > seq) ? args[seq++] : ""; if (isEmpty(jobType)) { String err = this.getClass().getName() + " > Please enter Job type as one of these :\n SF2HB|SF2HF|SF2MF|MF2HB|MF2HF|MF2MF|HB2HB|HB2HF|HB2MF|IMF2HF"; System.err.println(err); throw new IOException(err); } if (isEmpty(inputSource)) { String err = this.getClass().getName() + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); FieldMapping fm = createFieldMapping(conf, xmlFilePath, new StringBuilder()); outputSink = outputSink.charAt(outputSink.length() - 1) == '/' ? outputSink : outputSink + "/"; outputSink = outputSink + fm.tableName; createHBaseTable(fm); KVIndexer.FAM_NAME = fm.familyName.getBytes(); KVIndexer.FIELD_SEPARATOR = fm.fieldSeparator; conf.set(XML_FILE_PATH, xmlFilePath); conf.set(OUTPUT_FOLDER, outputSink); conf.set(SKIP_HEADER, skipHeader); conf.set(RAW_FILE_SEPATATOR, String.valueOf(fm.fieldSeparator)); Job job = Job.getInstance(conf, "com.bizosys.hsearch.kv.indexing.KVIndexer type : " + jobType + "\n" + inputSource + "\n" + outputSink); job.setJarByClass(this.getClass()); job.setNumReduceTasks(numberOfReducer); Integer jobTypeI = JobTypeMapping.get(jobType); if (jobTypeI == null) throw new IOException("Invalid Jobtype " + jobType); /** * if internal keyIndex is given then generate the keys first and then do indexing * else just run indexer by creating keys from hbase */ boolean keyGenjobStatus = false; if (-1 != fm.internalKey && runKeyGenJob) { Configuration keyGenConf = HBaseConfiguration.create(); keyGenConf.set(INPUT_SOURCE, inputSource); keyGenConf.set(XML_FILE_PATH, xmlFilePath); keyGenConf.set(OUTPUT_FOLDER, outputSink); keyGenConf.set(SKIP_HEADER, skipHeader); Job keyGenJob = Job.getInstance(keyGenConf, "Creating Keys KVKeyGenerator for " + inputSource); switch (jobTypeI) { case SF2HB: case SF2HF: case SF2MF: { FileInputFormat.addInputPath(keyGenJob, new Path(inputSource)); keyGenJob.setMapperClass(KVKeyGeneratorMapperFile.class); keyGenJob.setInputFormatClass(TextInputFormat.class); keyGenJob.setMapOutputKeyClass(Text.class); keyGenJob.setMapOutputValueClass(Text.class); keyGenJob.setReducerClass(KVKeyGeneratorReducerFile.class); keyGenJob.setNumReduceTasks(numberOfReducer); keyGenJob.setOutputKeyClass(NullWritable.class); keyGenJob.setOutputValueClass(Text.class); inputSource = outputSink + "_" + INPUTWITH_KEY; Path intermediatePath = new Path(inputSource); System.out.println("Final input path " + inputSource); FileOutputFormat.setOutputPath(keyGenJob, intermediatePath); keyGenjobStatus = keyGenJob.waitForCompletion(true); if (!keyGenjobStatus) { throw new IOException("Error in running Job for Key Generation"); } break; } case HB2HB: case HB2HF: case HB2MF: { Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); byte[] family = fm.familyName.getBytes(); for (String name : fm.nameWithField.keySet()) { Field fld = fm.nameWithField.get(name); if (!fld.isMergedKey) continue; scan.addColumn(family, fld.sourceName.trim().getBytes()); } TableMapReduceUtil.initTableMapperJob(inputSource, // input table scan, // Scan instance to control CF and attribute selection KVKeyGeneratorMapperHBase.class, // mapper class Text.class, // mapper output key ImmutableBytesWritable.class, // mapper output value keyGenJob); TableMapReduceUtil.initTableReducerJob(inputSource, // output table KVKeyGeneratorReducerHBase.class, // reducer class keyGenJob); keyGenjobStatus = keyGenJob.waitForCompletion(true); if (!keyGenjobStatus) { throw new IOException("Error in running Job for Key Generation"); } break; } default: break; } } /* * Run job based on job type eg. SF2HB,SF2MF,SF2HF etc. */ System.out.println("Sending path " + inputSource); runJob(jobTypeI, job, fm, inputSource, outputSink, scannerCacheSize, filter); }
From source file:com.bizosys.hsearch.kv.indexer.KVIndexer.java
License:Apache License
private static int runJob(int jobTypeI, Job job, FieldMapping fm, String input, String output, int scannerCacheSize, String filter) throws IOException, InterruptedException, ClassNotFoundException { int jobStatus = -1; switch (jobTypeI) { case SF2HB: { IdSearchLog.l.info("Starting Job for SF2HB input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(KVReducerHBase.class); TableMapReduceUtil.initTableReducerJob(fm.tableName, KVReducerHBase.class, job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; }/*ww w .j a v a 2 s . c o m*/ case SF2MF: { IdSearchLog.l.info("Starting Job for SF2MF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case SF2HF: { /* * First creates map file and then convert to hfile. * create intermediate dir for map file output * */ String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for SF2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("SF2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case HB2HB: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class Text.class, // mapper output key BytesWritable.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob(fm.tableName, // output table KVReducerHBase.class, // reducer class job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case HB2HF: { String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for HB2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("HB2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case HB2MF: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class Text.class, // mapper output key BytesWritable.class, // mapper output value job); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case IMF2HF: { Path finalOutputDir = new Path(output); job.setJarByClass(KVIndexer.class); job.setMapperClass(KVMapperHFile.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, finalOutputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable hTable = new HTable(job.getConfiguration(), fm.tableName); HFileOutputFormat.configureIncrementalLoad(job, hTable); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } default: throw new IOException("Invalid Jobtype " + jobTypeI); } }
From source file:com.bizosys.hsearch.kv.indexing.KVIndexer.java
License:Apache License
/** * Given a indexing parameters it starts a indexing. * Different indexing type are:/* ww w .j av a2 s. com*/ * SF2HB = Simple File(csv,tsv) to hbase directly. * SF2HF = Simple File(csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * SF2MF = Simple File(csv,tsv) to MapFile (key as {@link Text} and value as {@link BytesWritable}) * MF2HB = Map File(key and value as csv,tsv) to hbase. * MF2HF = Map File(key and value as csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * MF2MF = Map File(key and value as csv,tsv) to MapFile(key as {@link Text} and value as {@link BytesWritable}) * HB2HB = Hbase to Hbase * HB2HF = Hbase to HFile which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * HB2MF = Hbase to MapFile(key as {@link Text} and value as {@link BytesWritable}) * @param args * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void execute(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length < 7) { String err = "Usage : " + KVIndexer.class + " <<Job Type(SF2HB|SF2HF|SF2MF...)>> <<Input Source>> <<Output Sink>> <<XML File Configuration>> <<Skip Header(true|false)>> <<Run KeyGeneration Job>> <<Number Of reducer>> <<Speculative Execution>> <<scanner-cache-size>> <<filter>>"; IdSearchLog.l.fatal(err); System.exit(1); } String msg = this.getClass().getName() + " > Initializing indexer job."; IdSearchLog.l.info(msg); int seq = 0; int len = args.length; String jobType = (len > seq) ? args[seq++] : ""; String inputSource = (len > seq) ? args[seq++] : ""; String outputSink = (len > seq) ? args[seq++] : "/tmp/hsearch-index"; String xmlFilePath = (len > seq) ? args[seq++] : ""; String skipHeader = (len > seq) ? args[seq++] : "false"; boolean runKeyGenJob = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : false; int numberOfReducer = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 1; boolean speculativeExecution = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : true; int scannerCacheSize = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 300; String filter = (len > seq) ? args[seq++] : ""; if (isEmpty(jobType)) { String err = this.getClass().getName() + " > Please enter Job type as one of these :\n SF2HB|SF2HF|SF2MF|MF2HB|MF2HF|MF2MF|HB2HB|HB2HF|HB2MF|IMF2HF"; System.err.println(err); throw new IOException(err); } if (isEmpty(inputSource)) { String err = this.getClass().getName() + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); FieldMapping fm = createFieldMapping(conf, xmlFilePath, new StringBuilder()); outputSink = outputSink.charAt(outputSink.length() - 1) == '/' ? outputSink : outputSink + "/"; outputSink = outputSink + fm.tableName; createHBaseTable(fm); KVIndexer.FAM_NAME = fm.familyName.getBytes(); KVIndexer.FIELD_SEPARATOR = fm.fieldSeparator; conf.set(XML_FILE_PATH, xmlFilePath); conf.set(OUTPUT_FOLDER, outputSink); conf.set(SKIP_HEADER, skipHeader); conf.setBoolean("mapreduce.map.speculative", speculativeExecution); Job job = Job.getInstance(conf, "com.bizosys.hsearch.kv.indexing.KVIndexer type : " + jobType + "\n" + inputSource + "\n" + outputSink); job.setJarByClass(this.getClass()); job.setNumReduceTasks(numberOfReducer); Integer jobTypeI = JobTypeMapping.get(jobType); if (jobTypeI == null) throw new IOException("Invalid Jobtype " + jobType); /** * if internal keyIndex is given then generate the keys first and then do indexing * else just run indexer by creating keys from hbase */ boolean keyGenjobStatus = false; if (-1 != fm.internalKey && runKeyGenJob) { Configuration keyGenConf = HBaseConfiguration.create(); keyGenConf.set(INPUT_SOURCE, inputSource); keyGenConf.set(XML_FILE_PATH, xmlFilePath); keyGenConf.set(OUTPUT_FOLDER, outputSink); keyGenConf.set(SKIP_HEADER, skipHeader); Job keyGenJob = Job.getInstance(keyGenConf, "Creating Keys KVKeyGenerator for " + inputSource); switch (jobTypeI) { case SF2HB: case SF2HF: case SF2MF: { FileInputFormat.addInputPath(keyGenJob, new Path(inputSource)); keyGenJob.setMapperClass(KVKeyGeneratorMapperFile.class); keyGenJob.setInputFormatClass(TextInputFormat.class); keyGenJob.setMapOutputKeyClass(Text.class); keyGenJob.setMapOutputValueClass(Text.class); keyGenJob.setReducerClass(KVKeyGeneratorReducerFile.class); keyGenJob.setNumReduceTasks(numberOfReducer); keyGenJob.setOutputKeyClass(NullWritable.class); keyGenJob.setOutputValueClass(Text.class); inputSource = outputSink + "_" + INPUTWITH_KEY; Path intermediatePath = new Path(inputSource); System.out.println("Final input path " + inputSource); FileOutputFormat.setOutputPath(keyGenJob, intermediatePath); keyGenjobStatus = keyGenJob.waitForCompletion(true); if (!keyGenjobStatus) { throw new IOException("Error in running Job for Key Generation"); } break; } case HB2HB: case HB2HF: case HB2MF: { Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); // Added Filter if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } byte[] family = fm.familyName.getBytes(); for (String name : fm.nameWithField.keySet()) { Field fld = fm.nameWithField.get(name); if (!fld.isMergedKey) continue; scan.addColumn(family, fld.sourceName.trim().getBytes()); } TableMapReduceUtil.initTableMapperJob(inputSource, // input table scan, // Scan instance to control CF and attribute selection KVKeyGeneratorMapperHBase.class, // mapper class Text.class, // mapper output key ImmutableBytesWritable.class, // mapper output value keyGenJob); TableMapReduceUtil.initTableReducerJob(inputSource, // output table KVKeyGeneratorReducerHBase.class, // reducer class keyGenJob); keyGenjobStatus = keyGenJob.waitForCompletion(true); if (!keyGenjobStatus) { throw new IOException("Error in running Job for Key Generation"); } break; } case MF2HB: case MF2HF: case MF2MF: { break; } default: break; } } /* * Run job based on job type eg. SF2HB,SF2MF,SF2HF etc. */ System.out.println("Sending path " + inputSource); runJob(jobTypeI, job, fm, inputSource, outputSink, scannerCacheSize, filter); }
From source file:com.bizosys.hsearch.kv.indexing.KVIndexer.java
License:Apache License
private static int runJob(int jobTypeI, Job job, FieldMapping fm, String input, String output, int scannerCacheSize, String filter) throws IOException, InterruptedException, ClassNotFoundException { int jobStatus = -1; switch (jobTypeI) { case SF2HB: { IdSearchLog.l.info("Starting Job for SF2HB input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(KVReducerHBase.class); TableMapReduceUtil.initTableReducerJob(fm.tableName, KVReducerHBase.class, job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; }/*from w w w . ja v a2s. c om*/ case SF2HF: { //First creates map file and then convert to hfile. //create intermediate dir for map file output String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for SF2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("SF2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case SF2MF: { IdSearchLog.l.info("Starting Job for SF2MF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); job.setSortComparatorClass(TextPair.FirstComparator.class); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ImmutableBytesWritable.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case MF2HB: { job.setMapperClass(KVMapperMapFile.class); job.setInputFormatClass(SequenceFileAsTextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); SequenceFileAsTextInputFormat.addInputPath(job, new Path(input)); job.setReducerClass(KVReducerHBase.class); TableMapReduceUtil.initTableReducerJob(fm.tableName, KVReducerHBase.class, job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case MF2HF: { String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for HB2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("MF2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case MF2MF: { job.setMapperClass(KVMapperMapFile.class); job.setInputFormatClass(SequenceFileAsTextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); SequenceFileAsTextInputFormat.addInputPath(job, new Path(input)); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ImmutableBytesWritable.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case HB2HB: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class TextPair.class, // mapper output key Text.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob(fm.tableName, // output table KVReducerHBase.class, // reducer class job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case HB2HF: { String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for HB2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("HB2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case HB2MF: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class TextPair.class, // mapper output key Text.class, // mapper output value job); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ImmutableBytesWritable.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case IMF2HF: { Path finalOutputDir = new Path(output); job.setJarByClass(KVIndexer.class); job.setMapperClass(KVMapperHFile.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, finalOutputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable hTable = new HTable(job.getConfiguration(), fm.tableName); HFileOutputFormat.configureIncrementalLoad(job, hTable); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } default: throw new IOException("Invalid Jobtype " + jobTypeI); } }
From source file:com.bizosys.hsearch.kv.indexing.KVReplicatorHFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { int seq = 0;//w w w . j av a 2 s . c o m String inputFile = (args.length > seq) ? args[seq] : ""; seq++; String hfileOutputFile = (args.length > seq) ? args[seq] : ""; seq++; String tableName = (args.length > seq) ? args[seq] : ""; seq++; String familyName = (args.length > seq) ? args[seq] : "1"; seq++; String replaceFrom = (args.length > seq) ? args[seq] : ""; seq++; String replaceTo = (args.length > seq) ? args[seq] : ""; seq++; String startIndex = (args.length > seq) ? args[seq] : ""; seq++; String endIndex = (args.length > seq) ? args[seq] : ""; seq++; if (null == inputFile || inputFile.trim().isEmpty()) { String err = KVReplicatorHFile.class + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); conf.set(TABLE_NAME, tableName); conf.set(FAMILY_NAME, familyName); conf.set(REPLACE_FROM, replaceFrom); conf.set(REPLACE_TO, replaceTo); conf.set(START_INDEX, startIndex); conf.set(END_INDEX, endIndex); try { List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>(); HColumnDescriptor cols = new HColumnDescriptor(familyName.getBytes()); colFamilies.add(cols); HDML.create(tableName, colFamilies); } catch (HBaseException e) { e.printStackTrace(); } Job job = Job.getInstance(conf, "KVReplicatorHBase - creating HFile"); job.setJarByClass(KVReplicatorHFile.class); job.setMapperClass(KVHFileWriterMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputFile.trim())); FileOutputFormat.setOutputPath(job, new Path(hfileOutputFile.trim())); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable hTable = new HTable(conf, tableName); HFileOutputFormat.configureIncrementalLoad(job, hTable); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.bizosys.hsearch.kv.indexing.KVReplicatorMapFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { int seq = 0;/* w w w .ja v a2 s .c o m*/ String inputFile = (args.length > seq) ? args[seq] : ""; seq++; String outputFile = (args.length > seq) ? args[seq++] : "/tmp/hsearch-index"; String outputFileName = (args.length > seq) ? args[seq++] : "file1"; String xmlFilePath = (args.length > seq) ? args[seq++] : ""; String replaceFrom = (args.length > seq) ? args[seq++] : ""; String replaceTo = (args.length > seq) ? args[seq++] : ""; String startIndex = (args.length > seq) ? args[seq++] : ""; String endIndex = (args.length > seq) ? args[seq++] : ""; String numberOfReducerStr = (args.length > seq) ? args[seq] : "1"; int numberOfReducer = Integer.parseInt(numberOfReducerStr); if (null == inputFile || inputFile.trim().isEmpty()) { String err = KVReplicatorHFile.class + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); FieldMapping fm = KVIndexer.createFieldMapping(conf, xmlFilePath, new StringBuilder()); outputFile = outputFile.charAt(outputFile.length() - 1) == '/' ? outputFile : outputFile + "/"; outputFile = outputFile + fm.tableName; conf.set(OUTPUT_FILE_PATH, outputFile); conf.set(OUTPUT_FILE_NAME, outputFileName); conf.set(REPLACE_FROM, replaceFrom); conf.set(REPLACE_TO, replaceTo); conf.set(START_INDEX, startIndex); conf.set(END_INDEX, endIndex); Job job = Job.getInstance(conf, "KVReplicatorMapFile - Replicating Map File"); job.setJarByClass(KVReplicatorMapFile.class); job.setMapperClass(KVReplicatorMapper.class); job.setReducerClass(KVReplicatorReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(numberOfReducer); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputFile.trim())); FileSystem fs = FileSystem.get(conf); Path dummyPath = new Path("/tmp", "dummy"); if (fs.exists(dummyPath)) { fs.delete(dummyPath, true); } FileOutputFormat.setOutputPath(job, dummyPath); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.blackberry.logdriver.util.Cat.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from ww w. j ava 2s . c o m FileSystem fs = FileSystem.get(conf); // The command line options List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 2) { System.out.println("usage: [genericOptions] input [input ...] output"); System.exit(1); } // Get the files we need from the command line. for (int i = 0; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Cat.class); jobConf.setIfUnset("mapred.job.name", "Cat Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(CatMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }