List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:com.baifendian.swordfish.common.hive.metastore.HiveMetaStorePoolFactory.java
License:Apache License
public HiveMetaStorePoolFactory(String metastoreUris) { Configuration conf = new Configuration(); if (StringUtils.isNotEmpty(metastoreUris)) { conf.set("hive.metastore.uris", metastoreUris); } else {//w w w . java 2s .c om logger.error("Metastore conf is empty."); throw new RuntimeException("Metastore conf is empty."); } hConf = new HiveConf(conf, HiveConf.class); }
From source file:com.baifendian.swordfish.common.job.struct.datasource.HBaseDatasource.java
License:Apache License
@Override public void isConnectable() throws Exception { Connection con = null;/*w ww. j av a 2s. c o m*/ try { Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", this.zkQuorum); if (!StringUtils.isEmpty(this.zkZnodeParent)) { config.set("zookeeper.znode.parent", this.zkZnodeParent); } if (this.zkPort != null && this.zkPort != 0) { config.set("hbase.zookeeper.property.clientPort", this.zkPort.toString()); } con = ConnectionFactory.createConnection(config); } finally { if (con != null) { try { con.close(); } catch (IOException e) { logger.error("hbase try conn close conn error", e); throw e; } } } }
From source file:com.bark.hadoop.lab3.PageRank.java
@Override public int run(String args[]) { String tmp = "/tmp/" + new Date().getTime(); // long timeStamp = new Date().getTime(); try {/* www .ja va 2s. c o m*/ /** * Job 1: Parse XML input and read title,links */ Configuration conf = new Configuration(); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); Job job = Job.getInstance(conf); job.setJarByClass(PageRank.class); // specify a mapper job.setMapperClass(RedLinkMapper.class); // specify a reducer job.setReducerClass(RedLinkReducer.class); // specify output types job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // specify input and output DIRECTORIES FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(XmlInputFormat.class); FileOutputFormat.setOutputPath(job, new Path((args[1] + tmp + "/job1"))); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job1."); return 2; } /** * Job 2: Adjacency outGraph */ try { Configuration conf2 = new Configuration(); Job job2 = Job.getInstance(conf2); job2.setJarByClass(PageRank.class); // specify a mapper job2.setMapperClass(AdjMapper.class); // specify a reducer job2.setReducerClass(AdjReducer.class); // specify output types job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); // specify input and output DIRECTORIES FileInputFormat.addInputPath(job2, new Path((args[1] + tmp + "/job1"))); job2.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job2, new Path((args[1] + tmp + "/job2"))); job2.setOutputFormatClass(TextOutputFormat.class); job2.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job2."); return 2; } /** * Job 3: PageCount */ try { Configuration conf3 = new Configuration(); /** * Change output separator to "=" instead of default \t for this job */ conf3.set("mapreduce.output.textoutputformat.separator", "="); Job job3 = Job.getInstance(conf3); job3.setJarByClass(PageRank.class); // specify a mapper job3.setMapperClass(PageCountMapper.class); // specify a reducer job3.setReducerClass(PageCountReducer.class); // specify output types job3.setOutputKeyClass(Text.class); job3.setOutputValueClass(IntWritable.class); // specify input and output DIRECTORIES FileInputFormat.addInputPath(job3, new Path((args[1] + tmp + "/job2"))); job3.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job3, new Path((args[1] + tmp + "/job3"))); job3.setOutputFormatClass(TextOutputFormat.class); job3.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job3."); return 2; } /** * Job 4: PageRank */ for (int i = 1; i < 9; i++) { try { Configuration conf4 = new Configuration(); /** * Read number of nodes from the output of job 3 : pageCount */ Path path = new Path((args[1] + tmp + "/job3")); FileSystem fs = path.getFileSystem(conf4); RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true); int n = 0; Pattern pt = Pattern.compile("(\\d+)"); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isFile() && n == 0) { FSDataInputStream inputStream = fs.open(lfs.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); String s = null; while ((s = br.readLine()) != null) { Matcher mt = pt.matcher(s); if (mt.find()) { n = new Integer(mt.group(1)); break; } } } } /** * Done reading number of nodes, make it available to MapReduce * job key: N */ conf4.setInt("N", n); Job job4 = Job.getInstance(conf4); job4.setJarByClass(PageRank.class); // specify a mapper job4.setMapperClass(PageRankMapper.class); // specify a reducer job4.setReducerClass(PageRankReducer.class); // specify output types job4.setOutputKeyClass(Text.class); job4.setOutputValueClass(Text.class); // specify input and output DIRECTORIES if (i == 1) { FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job2"))); } else { FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job4/" + (i - 1)))); } job4.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job4, new Path((args[1] + tmp + "/job4/" + i))); job4.setOutputFormatClass(TextOutputFormat.class); job4.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job4."); return 2; } } /** * Job 5: Sort iteration 1 and iteration 8 */ int returnCode = 0; for (int i = 0; i < 2; i++) { try { Configuration conf5 = new Configuration(); /** * Read number of nodes from the output of job 3 : pageCount */ Path path = new Path((args[1] + tmp + "/job3")); FileSystem fs = path.getFileSystem(conf5); RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true); int n = 0; Pattern pt = Pattern.compile("(\\d+)"); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isFile() && n == 0) { FSDataInputStream inputStream = fs.open(lfs.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); String s = null; while ((s = br.readLine()) != null) { Matcher mt = pt.matcher(s); if (mt.find()) { n = new Integer(mt.group(1)); break; } } } } /** * Done reading number of nodes, make it available to MapReduce * job key: N */ conf5.setInt("N", n); Job job5 = Job.getInstance(conf5); /** * one reducer only */ job5.setNumReduceTasks(1); job5.setSortComparatorClass(MyWritableComparator.class); job5.setJarByClass(PageRank.class); // specify a mapper job5.setMapperClass(SortMapper.class); job5.setMapOutputKeyClass(DoubleWritable.class); job5.setMapOutputValueClass(Text.class); // specify a reducer job5.setReducerClass(SortReducer.class); // specify output types job5.setOutputKeyClass(Text.class); job5.setOutputValueClass(DoubleWritable.class); // specify input and output DIRECTORIES int y = 7 * i + 1; FileInputFormat.addInputPath(job5, new Path((args[1] + tmp + "/job4/" + y))); job5.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job5, new Path((args[1] + tmp + "/job5/" + y))); job5.setOutputFormatClass(TextOutputFormat.class); returnCode = job5.waitForCompletion(true) ? 0 : 1; } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job5."); return 2; } } /** * Copy necessary output files to args[1] /** * Copy necessary output files to args[1] */ /** * Rename and copy OutLinkGraph */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job2/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.outlink.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } /** * Rename and copy total number of pages */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job3/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.n.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } /** * Rename and copy iteration 1 */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job5/1/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.iter1.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } /** * Rename and copy iteration 8 */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job5/8/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.iter8.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } return returnCode; }
From source file:com.basho.riak.hadoop.config.RiakConfig.java
License:Apache License
/** * Add a riak location to the {@link Configuration} passed. * /* www . j a v a 2 s. c om*/ * @param conf * the {@link Configuration} to add a location too * @param location * the {@link RiakLocation} to add * @return the {@link Configuration} with <code>location</code> added to the * location property */ public static Configuration addLocation(Configuration conf, RiakLocation location) { StringBuilder sb = new StringBuilder(); String currentLocations = conf.get(LOCATIONS_PROPERTY); if (currentLocations != null) { sb.append(currentLocations); } if (sb.length() > 0) { sb.append(COMMA); } sb.append(location.asString()); conf.set(LOCATIONS_PROPERTY, sb.toString()); return conf; }
From source file:com.basho.riak.hadoop.config.RiakConfig.java
License:Apache License
/** * Add the output bucket for the results to the config. * //w w w.j a va 2 s .c o m * @param conf * the {@link Configuration} to update * @param bucket * the bucket to add * @return the updated {@link Configuration} */ public static Configuration setOutputBucket(Configuration conf, String bucket) { conf.set(OUTPUT_BUCKET_PROPERTY, bucket); return conf; }
From source file:com.bigdata.hbase.HBaseConexion.java
public static Connection getConnection() throws IOException { Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", hbaseZookeeperQuorum); config.set("hbase.zookeeper.property.clientPort", hbaseZookeeperClientPort); config.set("zookeeper.znode.parent", "/hbase-unsecure"); Connection c = ConnectionFactory.createConnection(config); return c;//from w w w. j a va 2 s.c o m }
From source file:com.bizosys.hsearch.kv.indexer.KVIndexer.java
License:Apache License
/** * Given a indexing parameters it starts a indexing. * Different indexing type are://from w ww. ja v a 2 s . c o m * SF2HB = Simple File(csv,tsv) to hbase directly. * SF2HF = Simple File(csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * SF2MF = Simple File(csv,tsv) to MapFile (key as {@link Text} and value as {@link BytesWritable}) * MF2HB = Map File(key and value as csv,tsv) to hbase. * MF2HF = Map File(key and value as csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * MF2MF = Map File(key and value as csv,tsv) to MapFile(key as {@link Text} and value as {@link BytesWritable}) * HB2HB = Hbase to Hbase * HB2HF = Hbase to HFile which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * HB2MF = Hbase to MapFile(key as {@link Text} and value as {@link BytesWritable}) * @param args * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void execute(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length < 7) { String err = "Usage : " + KVIndexer.class + " <<Job Type(SF2HB|SF2HF|SF2MF...)>> <<Input Source>> <<Output Sink>> <<XML File Configuration>> <<Skip Header(true|false)>> <<Run KeyGeneration Job>> <<Number Of reducer>> <<Speculative Execution>> <<scanner-cache-size>> <<filter>>"; IdSearchLog.l.fatal(err); System.exit(1); } String msg = this.getClass().getName() + " > Initializing indexer job."; IdSearchLog.l.info(msg); int seq = 0; int len = args.length; String jobType = (len > seq) ? args[seq++] : ""; String inputSource = (len > seq) ? args[seq++] : ""; String outputSink = (len > seq) ? args[seq++] : "/tmp/hsearch-index"; String xmlFilePath = (len > seq) ? args[seq++] : ""; String skipHeader = (len > seq) ? args[seq++] : "false"; boolean runKeyGenJob = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : false; int numberOfReducer = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 1; boolean speculativeExecution = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : true; int scannerCacheSize = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 300; String filter = (len > seq) ? args[seq++] : ""; if (isEmpty(jobType)) { String err = this.getClass().getName() + " > Please enter Job type as one of these :\n SF2HB|SF2HF|SF2MF|MF2HB|MF2HF|MF2MF|HB2HB|HB2HF|HB2MF|IMF2HF"; System.err.println(err); throw new IOException(err); } if (isEmpty(inputSource)) { String err = this.getClass().getName() + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); FieldMapping fm = createFieldMapping(conf, xmlFilePath, new StringBuilder()); outputSink = outputSink.charAt(outputSink.length() - 1) == '/' ? outputSink : outputSink + "/"; outputSink = outputSink + fm.tableName; createHBaseTable(fm); KVIndexer.FAM_NAME = fm.familyName.getBytes(); KVIndexer.FIELD_SEPARATOR = fm.fieldSeparator; conf.set(XML_FILE_PATH, xmlFilePath); conf.set(OUTPUT_FOLDER, outputSink); conf.set(SKIP_HEADER, skipHeader); conf.set(RAW_FILE_SEPATATOR, String.valueOf(fm.fieldSeparator)); Job job = Job.getInstance(conf, "com.bizosys.hsearch.kv.indexing.KVIndexer type : " + jobType + "\n" + inputSource + "\n" + outputSink); job.setJarByClass(this.getClass()); job.setNumReduceTasks(numberOfReducer); Integer jobTypeI = JobTypeMapping.get(jobType); if (jobTypeI == null) throw new IOException("Invalid Jobtype " + jobType); /** * if internal keyIndex is given then generate the keys first and then do indexing * else just run indexer by creating keys from hbase */ boolean keyGenjobStatus = false; if (-1 != fm.internalKey && runKeyGenJob) { Configuration keyGenConf = HBaseConfiguration.create(); keyGenConf.set(INPUT_SOURCE, inputSource); keyGenConf.set(XML_FILE_PATH, xmlFilePath); keyGenConf.set(OUTPUT_FOLDER, outputSink); keyGenConf.set(SKIP_HEADER, skipHeader); Job keyGenJob = Job.getInstance(keyGenConf, "Creating Keys KVKeyGenerator for " + inputSource); switch (jobTypeI) { case SF2HB: case SF2HF: case SF2MF: { FileInputFormat.addInputPath(keyGenJob, new Path(inputSource)); keyGenJob.setMapperClass(KVKeyGeneratorMapperFile.class); keyGenJob.setInputFormatClass(TextInputFormat.class); keyGenJob.setMapOutputKeyClass(Text.class); keyGenJob.setMapOutputValueClass(Text.class); keyGenJob.setReducerClass(KVKeyGeneratorReducerFile.class); keyGenJob.setNumReduceTasks(numberOfReducer); keyGenJob.setOutputKeyClass(NullWritable.class); keyGenJob.setOutputValueClass(Text.class); inputSource = outputSink + "_" + INPUTWITH_KEY; Path intermediatePath = new Path(inputSource); System.out.println("Final input path " + inputSource); FileOutputFormat.setOutputPath(keyGenJob, intermediatePath); keyGenjobStatus = keyGenJob.waitForCompletion(true); if (!keyGenjobStatus) { throw new IOException("Error in running Job for Key Generation"); } break; } case HB2HB: case HB2HF: case HB2MF: { Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); byte[] family = fm.familyName.getBytes(); for (String name : fm.nameWithField.keySet()) { Field fld = fm.nameWithField.get(name); if (!fld.isMergedKey) continue; scan.addColumn(family, fld.sourceName.trim().getBytes()); } TableMapReduceUtil.initTableMapperJob(inputSource, // input table scan, // Scan instance to control CF and attribute selection KVKeyGeneratorMapperHBase.class, // mapper class Text.class, // mapper output key ImmutableBytesWritable.class, // mapper output value keyGenJob); TableMapReduceUtil.initTableReducerJob(inputSource, // output table KVKeyGeneratorReducerHBase.class, // reducer class keyGenJob); keyGenjobStatus = keyGenJob.waitForCompletion(true); if (!keyGenjobStatus) { throw new IOException("Error in running Job for Key Generation"); } break; } default: break; } } /* * Run job based on job type eg. SF2HB,SF2MF,SF2HF etc. */ System.out.println("Sending path " + inputSource); runJob(jobTypeI, job, fm, inputSource, outputSink, scannerCacheSize, filter); }
From source file:com.bizosys.hsearch.kv.indexer.KVIndexer.java
License:Apache License
private static int runJob(int jobTypeI, Job job, FieldMapping fm, String input, String output, int scannerCacheSize, String filter) throws IOException, InterruptedException, ClassNotFoundException { int jobStatus = -1; switch (jobTypeI) { case SF2HB: { IdSearchLog.l.info("Starting Job for SF2HB input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(KVReducerHBase.class); TableMapReduceUtil.initTableReducerJob(fm.tableName, KVReducerHBase.class, job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; }/*from w ww . j av a 2 s . c o m*/ case SF2MF: { IdSearchLog.l.info("Starting Job for SF2MF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case SF2HF: { /* * First creates map file and then convert to hfile. * create intermediate dir for map file output * */ String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for SF2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("SF2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case HB2HB: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class Text.class, // mapper output key BytesWritable.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob(fm.tableName, // output table KVReducerHBase.class, // reducer class job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case HB2HF: { String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for HB2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("HB2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case HB2MF: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class Text.class, // mapper output key BytesWritable.class, // mapper output value job); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case IMF2HF: { Path finalOutputDir = new Path(output); job.setJarByClass(KVIndexer.class); job.setMapperClass(KVMapperHFile.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, finalOutputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable hTable = new HTable(job.getConfiguration(), fm.tableName); HFileOutputFormat.configureIncrementalLoad(job, hTable); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } default: throw new IOException("Invalid Jobtype " + jobTypeI); } }
From source file:com.bizosys.hsearch.kv.indexing.KVIndexer.java
License:Apache License
/** * Given a indexing parameters it starts a indexing. * Different indexing type are:/*ww w. j a va 2 s .c o m*/ * SF2HB = Simple File(csv,tsv) to hbase directly. * SF2HF = Simple File(csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * SF2MF = Simple File(csv,tsv) to MapFile (key as {@link Text} and value as {@link BytesWritable}) * MF2HB = Map File(key and value as csv,tsv) to hbase. * MF2HF = Map File(key and value as csv,tsv) to HFile, which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * MF2MF = Map File(key and value as csv,tsv) to MapFile(key as {@link Text} and value as {@link BytesWritable}) * HB2HB = Hbase to Hbase * HB2HF = Hbase to HFile which can be loaded to Hbase using LoadIncrementalHfiles. class from hbase. * HB2MF = Hbase to MapFile(key as {@link Text} and value as {@link BytesWritable}) * @param args * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void execute(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length < 7) { String err = "Usage : " + KVIndexer.class + " <<Job Type(SF2HB|SF2HF|SF2MF...)>> <<Input Source>> <<Output Sink>> <<XML File Configuration>> <<Skip Header(true|false)>> <<Run KeyGeneration Job>> <<Number Of reducer>> <<Speculative Execution>> <<scanner-cache-size>> <<filter>>"; IdSearchLog.l.fatal(err); System.exit(1); } String msg = this.getClass().getName() + " > Initializing indexer job."; IdSearchLog.l.info(msg); int seq = 0; int len = args.length; String jobType = (len > seq) ? args[seq++] : ""; String inputSource = (len > seq) ? args[seq++] : ""; String outputSink = (len > seq) ? args[seq++] : "/tmp/hsearch-index"; String xmlFilePath = (len > seq) ? args[seq++] : ""; String skipHeader = (len > seq) ? args[seq++] : "false"; boolean runKeyGenJob = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : false; int numberOfReducer = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 1; boolean speculativeExecution = (len > seq) ? args[seq++].trim().equalsIgnoreCase("true") : true; int scannerCacheSize = (len > seq) ? Integer.parseInt(args[seq++].trim()) : 300; String filter = (len > seq) ? args[seq++] : ""; if (isEmpty(jobType)) { String err = this.getClass().getName() + " > Please enter Job type as one of these :\n SF2HB|SF2HF|SF2MF|MF2HB|MF2HF|MF2MF|HB2HB|HB2HF|HB2MF|IMF2HF"; System.err.println(err); throw new IOException(err); } if (isEmpty(inputSource)) { String err = this.getClass().getName() + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); FieldMapping fm = createFieldMapping(conf, xmlFilePath, new StringBuilder()); outputSink = outputSink.charAt(outputSink.length() - 1) == '/' ? outputSink : outputSink + "/"; outputSink = outputSink + fm.tableName; createHBaseTable(fm); KVIndexer.FAM_NAME = fm.familyName.getBytes(); KVIndexer.FIELD_SEPARATOR = fm.fieldSeparator; conf.set(XML_FILE_PATH, xmlFilePath); conf.set(OUTPUT_FOLDER, outputSink); conf.set(SKIP_HEADER, skipHeader); conf.setBoolean("mapreduce.map.speculative", speculativeExecution); Job job = Job.getInstance(conf, "com.bizosys.hsearch.kv.indexing.KVIndexer type : " + jobType + "\n" + inputSource + "\n" + outputSink); job.setJarByClass(this.getClass()); job.setNumReduceTasks(numberOfReducer); Integer jobTypeI = JobTypeMapping.get(jobType); if (jobTypeI == null) throw new IOException("Invalid Jobtype " + jobType); /** * if internal keyIndex is given then generate the keys first and then do indexing * else just run indexer by creating keys from hbase */ boolean keyGenjobStatus = false; if (-1 != fm.internalKey && runKeyGenJob) { Configuration keyGenConf = HBaseConfiguration.create(); keyGenConf.set(INPUT_SOURCE, inputSource); keyGenConf.set(XML_FILE_PATH, xmlFilePath); keyGenConf.set(OUTPUT_FOLDER, outputSink); keyGenConf.set(SKIP_HEADER, skipHeader); Job keyGenJob = Job.getInstance(keyGenConf, "Creating Keys KVKeyGenerator for " + inputSource); switch (jobTypeI) { case SF2HB: case SF2HF: case SF2MF: { FileInputFormat.addInputPath(keyGenJob, new Path(inputSource)); keyGenJob.setMapperClass(KVKeyGeneratorMapperFile.class); keyGenJob.setInputFormatClass(TextInputFormat.class); keyGenJob.setMapOutputKeyClass(Text.class); keyGenJob.setMapOutputValueClass(Text.class); keyGenJob.setReducerClass(KVKeyGeneratorReducerFile.class); keyGenJob.setNumReduceTasks(numberOfReducer); keyGenJob.setOutputKeyClass(NullWritable.class); keyGenJob.setOutputValueClass(Text.class); inputSource = outputSink + "_" + INPUTWITH_KEY; Path intermediatePath = new Path(inputSource); System.out.println("Final input path " + inputSource); FileOutputFormat.setOutputPath(keyGenJob, intermediatePath); keyGenjobStatus = keyGenJob.waitForCompletion(true); if (!keyGenjobStatus) { throw new IOException("Error in running Job for Key Generation"); } break; } case HB2HB: case HB2HF: case HB2MF: { Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); // Added Filter if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } byte[] family = fm.familyName.getBytes(); for (String name : fm.nameWithField.keySet()) { Field fld = fm.nameWithField.get(name); if (!fld.isMergedKey) continue; scan.addColumn(family, fld.sourceName.trim().getBytes()); } TableMapReduceUtil.initTableMapperJob(inputSource, // input table scan, // Scan instance to control CF and attribute selection KVKeyGeneratorMapperHBase.class, // mapper class Text.class, // mapper output key ImmutableBytesWritable.class, // mapper output value keyGenJob); TableMapReduceUtil.initTableReducerJob(inputSource, // output table KVKeyGeneratorReducerHBase.class, // reducer class keyGenJob); keyGenjobStatus = keyGenJob.waitForCompletion(true); if (!keyGenjobStatus) { throw new IOException("Error in running Job for Key Generation"); } break; } case MF2HB: case MF2HF: case MF2MF: { break; } default: break; } } /* * Run job based on job type eg. SF2HB,SF2MF,SF2HF etc. */ System.out.println("Sending path " + inputSource); runJob(jobTypeI, job, fm, inputSource, outputSink, scannerCacheSize, filter); }
From source file:com.bizosys.hsearch.kv.indexing.KVIndexer.java
License:Apache License
private static int runJob(int jobTypeI, Job job, FieldMapping fm, String input, String output, int scannerCacheSize, String filter) throws IOException, InterruptedException, ClassNotFoundException { int jobStatus = -1; switch (jobTypeI) { case SF2HB: { IdSearchLog.l.info("Starting Job for SF2HB input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(KVReducerHBase.class); TableMapReduceUtil.initTableReducerJob(fm.tableName, KVReducerHBase.class, job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; }/*from ww w .j av a 2s . c o m*/ case SF2HF: { //First creates map file and then convert to hfile. //create intermediate dir for map file output String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for SF2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("SF2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case SF2MF: { IdSearchLog.l.info("Starting Job for SF2MF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and output folder " + output); FileInputFormat.addInputPath(job, new Path(input)); job.setMapperClass(KVMapperFile.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); job.setSortComparatorClass(TextPair.FirstComparator.class); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ImmutableBytesWritable.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case MF2HB: { job.setMapperClass(KVMapperMapFile.class); job.setInputFormatClass(SequenceFileAsTextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); SequenceFileAsTextInputFormat.addInputPath(job, new Path(input)); job.setReducerClass(KVReducerHBase.class); TableMapReduceUtil.initTableReducerJob(fm.tableName, KVReducerHBase.class, job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case MF2HF: { String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for HB2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("MF2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case MF2MF: { job.setMapperClass(KVMapperMapFile.class); job.setInputFormatClass(SequenceFileAsTextInputFormat.class); job.setMapOutputKeyClass(TextPair.class); job.setMapOutputValueClass(Text.class); SequenceFileAsTextInputFormat.addInputPath(job, new Path(input)); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ImmutableBytesWritable.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case HB2HB: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class TextPair.class, // mapper output key Text.class, // mapper output value job); TableMapReduceUtil.initTableReducerJob(fm.tableName, // output table KVReducerHBase.class, // reducer class job); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case HB2HF: { String intermediateFolder = output + "_intermediate"; Path intermediateOutpurDir = new Path(intermediateFolder); IdSearchLog.l.info("Starting Job for HB2HF input field separator " + KVIndexer.FIELD_SEPARATOR + " using hbase table : " + fm.tableName + " and intremediate output folder " + intermediateFolder + " final output dir " + output); //reset the output folder to intermediate folder Configuration conf = job.getConfiguration(); conf.set(OUTPUT_FOLDER, intermediateFolder); int jobT = JobTypeMapping.get("HB2MF"); jobStatus = runJob(jobT, job, fm, input, intermediateFolder, scannerCacheSize, filter); if (jobStatus == 0) { Configuration hfileConf = HBaseConfiguration.create(); hfileConf.set(XML_FILE_PATH, conf.get(XML_FILE_PATH)); Job hfileJob = Job.getInstance(hfileConf, "Creating Hfile"); String dataInputPath = intermediateFolder + "/" + MapFile.DATA_FILE_NAME; jobT = JobTypeMapping.get("IMF2HF"); jobStatus = runJob(jobT, hfileJob, fm, dataInputPath, output, scannerCacheSize, filter); } //delete intermediate dir FileSystem.get(conf).delete(intermediateOutpurDir, true); //delete the empty _SUCCESS folder FileSystem.get(conf).delete(new Path(output, "_SUCCESS"), true); return jobStatus; } case HB2MF: { if (fm.tableName.equals(input)) { throw new IOException("Input table and index table can not be same"); } Scan scan = new Scan(); scan.setCaching(scannerCacheSize); scan.setCacheBlocks(false); scan.addFamily(fm.familyName.getBytes()); if (null != filter) { if (filter.trim().length() > 0) { int index = filter.indexOf('='); scan.setFilter(new SingleColumnValueFilter(fm.familyName.getBytes(), filter.substring(0, index).getBytes(), CompareOp.EQUAL, filter.substring(index + 1).getBytes())); } } TableMapReduceUtil.initTableMapperJob(input, // input table scan, // Scan instance to control CF and attribute selection KVMapperHBase.class, // mapper class TextPair.class, // mapper output key Text.class, // mapper output value job); job.setReducerClass(KVReducerMapFile.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ImmutableBytesWritable.class); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } case IMF2HF: { Path finalOutputDir = new Path(output); job.setJarByClass(KVIndexer.class); job.setMapperClass(KVMapperHFile.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, finalOutputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable hTable = new HTable(job.getConfiguration(), fm.tableName); HFileOutputFormat.configureIncrementalLoad(job, hTable); jobStatus = job.waitForCompletion(true) ? 0 : 1; return jobStatus; } default: throw new IOException("Invalid Jobtype " + jobTypeI); } }