List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:com.bizosys.hsearch.kv.indexing.KVReplicatorHFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { int seq = 0;//from w ww . j a v a 2 s . c o m String inputFile = (args.length > seq) ? args[seq] : ""; seq++; String hfileOutputFile = (args.length > seq) ? args[seq] : ""; seq++; String tableName = (args.length > seq) ? args[seq] : ""; seq++; String familyName = (args.length > seq) ? args[seq] : "1"; seq++; String replaceFrom = (args.length > seq) ? args[seq] : ""; seq++; String replaceTo = (args.length > seq) ? args[seq] : ""; seq++; String startIndex = (args.length > seq) ? args[seq] : ""; seq++; String endIndex = (args.length > seq) ? args[seq] : ""; seq++; if (null == inputFile || inputFile.trim().isEmpty()) { String err = KVReplicatorHFile.class + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); conf.set(TABLE_NAME, tableName); conf.set(FAMILY_NAME, familyName); conf.set(REPLACE_FROM, replaceFrom); conf.set(REPLACE_TO, replaceTo); conf.set(START_INDEX, startIndex); conf.set(END_INDEX, endIndex); try { List<HColumnDescriptor> colFamilies = new ArrayList<HColumnDescriptor>(); HColumnDescriptor cols = new HColumnDescriptor(familyName.getBytes()); colFamilies.add(cols); HDML.create(tableName, colFamilies); } catch (HBaseException e) { e.printStackTrace(); } Job job = Job.getInstance(conf, "KVReplicatorHBase - creating HFile"); job.setJarByClass(KVReplicatorHFile.class); job.setMapperClass(KVHFileWriterMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputFile.trim())); FileOutputFormat.setOutputPath(job, new Path(hfileOutputFile.trim())); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable hTable = new HTable(conf, tableName); HFileOutputFormat.configureIncrementalLoad(job, hTable); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.bizosys.hsearch.kv.indexing.KVReplicatorMapFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { int seq = 0;//from w w w. jav a 2 s .com String inputFile = (args.length > seq) ? args[seq] : ""; seq++; String outputFile = (args.length > seq) ? args[seq++] : "/tmp/hsearch-index"; String outputFileName = (args.length > seq) ? args[seq++] : "file1"; String xmlFilePath = (args.length > seq) ? args[seq++] : ""; String replaceFrom = (args.length > seq) ? args[seq++] : ""; String replaceTo = (args.length > seq) ? args[seq++] : ""; String startIndex = (args.length > seq) ? args[seq++] : ""; String endIndex = (args.length > seq) ? args[seq++] : ""; String numberOfReducerStr = (args.length > seq) ? args[seq] : "1"; int numberOfReducer = Integer.parseInt(numberOfReducerStr); if (null == inputFile || inputFile.trim().isEmpty()) { String err = KVReplicatorHFile.class + " > Please enter input file path."; System.err.println(err); throw new IOException(err); } Configuration conf = HBaseConfiguration.create(); FieldMapping fm = KVIndexer.createFieldMapping(conf, xmlFilePath, new StringBuilder()); outputFile = outputFile.charAt(outputFile.length() - 1) == '/' ? outputFile : outputFile + "/"; outputFile = outputFile + fm.tableName; conf.set(OUTPUT_FILE_PATH, outputFile); conf.set(OUTPUT_FILE_NAME, outputFileName); conf.set(REPLACE_FROM, replaceFrom); conf.set(REPLACE_TO, replaceTo); conf.set(START_INDEX, startIndex); conf.set(END_INDEX, endIndex); Job job = Job.getInstance(conf, "KVReplicatorMapFile - Replicating Map File"); job.setJarByClass(KVReplicatorMapFile.class); job.setMapperClass(KVReplicatorMapper.class); job.setReducerClass(KVReplicatorReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(numberOfReducer); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputFile.trim())); FileSystem fs = FileSystem.get(conf); Path dummyPath = new Path("/tmp", "dummy"); if (fs.exists(dummyPath)) { fs.delete(dummyPath, true); } FileOutputFormat.setOutputPath(job, dummyPath); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }
From source file:com.blackberry.logdriver.Lock.java
License:Apache License
public static void main(String[] args) { if (args.length < 1) { printUsage();/*from www .j a va2s .co m*/ System.exit(1); } // We'll be using a Configuration object Configuration conf = new Configuration(); // Check args for conf files or specific configs int i = 0; while (args[i].startsWith("-")) { String arg = args[i]; if (arg.startsWith("-confFiles=")) { String fileNames = arg.substring("-confFiles=".length()); String[] files = fileNames.split(","); for (String file : files) { conf.addResource(new Path(file)); } } else if (arg.startsWith("-zkConnectString=")) { String connectString = arg.substring("-zkConnectString=".length()); conf.set("zk.connect.string", connectString); } i++; } LockUtil lockUtil = null; try { lockUtil = new LockUtil(conf); } catch (Exception e) { LOG.error("Error getting ZooKeeper client.", e); System.exit(1); } // Look for the SCAN command. It's a little different. if (args.length > i && args[i].toUpperCase().equals("SCAN")) { String root = ""; if (args.length > 1 + 1) { root = args[i + 1]; } try { scan(lockUtil, root); } catch (Exception e) { e.printStackTrace(); } return; } // Look for the SCAN command. It's a little different. if (args.length > i + 1 && args[i].toUpperCase().equals("PURGE")) { String root = args[i + 1]; try { purge(lockUtil, root); } catch (Exception e) { e.printStackTrace(); } return; } // Now, just grab the required args String readWrite = null; String lockUnlock = null; String dcNumber = null; String service = null; String date = null; String hour = null; String component = null; if (args.length < i + 6) { printUsage(); System.exit(1); } readWrite = args[i].toUpperCase(); if (!readWrite.equals("READ") && !readWrite.equals("WRITE")) { printUsage(); System.exit(1); } i++; lockUnlock = args[i].toUpperCase(); if (!lockUnlock.equals("RESET") && !lockUnlock.equals("STATUS")) { printUsage(); System.exit(1); } i++; dcNumber = args[i]; i++; service = args[i]; i++; date = args[i]; i++; hour = args[i]; i++; component = args[i]; PathInfo pathInfo = new PathInfo(); try { pathInfo.setDcNumber(dcNumber); pathInfo.setService(service); pathInfo.setDate(date); pathInfo.setHour(hour); pathInfo.setComponent(component); } catch (Exception e) { LOG.error("Exception configuring path info.", e); System.exit(1); } String lockPath = null; try { lockPath = lockUtil.getLockPath(pathInfo); } catch (Exception e) { LOG.error("Error getting lock path", e); System.exit(1); } if (readWrite.equals("READ") && lockUnlock.equals("RESET")) { while (true) { try { lockUtil.resetReadLock(lockPath); } catch (KeeperException.ConnectionLossException e) { LOG.warn("Lost connection to ZooKeeper. Retrying.", e); continue; } catch (Exception e) { LOG.error("Unexpected error", e); System.exit(1); } break; } } else if (readWrite.equals("READ") && lockUnlock.equals("STATUS")) { long numLocks = 0; while (true) { try { numLocks = lockUtil.getReadLockCount(lockPath); } catch (KeeperException.ConnectionLossException e) { LOG.warn("Lost connection to ZooKeeper. Retrying.", e); continue; } catch (Exception e) { LOG.error("Unexpected error", e); System.exit(1); } System.out.println("Read lock count is " + numLocks); break; } } else if (readWrite.equals("WRITE") && lockUnlock.equals("RESET")) { while (true) { try { lockUtil.resetWriteLock(lockPath); } catch (KeeperException.ConnectionLossException e) { LOG.warn("Lost connection to ZooKeeper. Retrying.", e); continue; } catch (Exception e) { LOG.error("Unexpected error", e); System.exit(1); } break; } } else if (readWrite.equals("WRITE") && lockUnlock.equals("STATUS")) { long numLocks = 0; while (true) { try { numLocks = lockUtil.getWriteLockCount(lockPath); } catch (KeeperException.ConnectionLossException e) { LOG.warn("Lost connection to ZooKeeper. Retrying.", e); continue; } catch (Exception e) { LOG.error("Unexpected error", e); System.exit(1); } System.out.println("Write lock count is " + numLocks); break; } } }
From source file:com.blackberry.logdriver.LockedFs.java
License:Apache License
public int run(String[] args) throws Exception { // The required args are zkConnectString, dcNumber, service, date, hour, // component, from, to if (args.length < 7) { printUsage();/*www .j a v a 2s .c o m*/ System.exit(1); } String zkConnectString = args[0]; String dcNumber = args[1]; String service = args[2]; String date = args[3]; String hour = args[4]; String component = args[5]; String[] commands = new String[args.length - 6]; String logDir = getConf().get("logdriver.logdir.name", "logs"); for (int i = 6; i < args.length; i++) { commands[i - 6] = args[i]; } // Set the configuration correctly, so we can reach zookeeper Configuration conf = getConf(); conf.set("zk.connect.string", zkConnectString); LockUtil lockUtil = null; String lockPath = null; try { lockUtil = new LockUtil(conf); PathInfo pathInfo = new PathInfo(); pathInfo.setDcNumber(dcNumber); pathInfo.setService(service); pathInfo.setLogdir(logDir); pathInfo.setDate(date); pathInfo.setHour(hour); pathInfo.setComponent(component); lockPath = lockUtil.getLockPath(pathInfo); // Get the write lock while (true) { try { lockUtil.acquireWriteLock(lockPath); break; } catch (KeeperException.ConnectionLossException e) { LOG.warn("Lost connection to ZooKeeper. Retrying.", e); } } for (String command : commands) { LOG.info("Running {}", command); String[] parts = command.split("\\s+"); if ("move".equals(parts[0].toLowerCase())) { if (parts.length < 3) { LOG.error("Move required at least 2 arguements"); return 1; } String[] from = new String[parts.length - 2]; for (int i = 1; i < parts.length - 1; i++) { from[i - 1] = parts[i]; } String to = parts[parts.length - 1]; move(conf, from, to); } else if ("delete".equals(parts[0].toLowerCase())) { for (int i = 1; i < parts.length; i++) { delete(conf, parts[i]); } } else if ("touch".equals(parts[0].toLowerCase())) { for (int i = 1; i < parts.length; i++) { touch(conf, parts[i]); } } } } catch (Exception e) { LOG.error("Caught exception.", e); } finally { // Release the write lock while (true) { try { lockUtil.releaseWriteLock(lockPath); lockUtil.close(); break; } catch (KeeperException.ConnectionLossException e) { LOG.warn("Lost connection to ZooKeeper. Retrying.", e); } } } return 0; }
From source file:com.blackberry.logdriver.LockedFsShell.java
License:Apache License
public static void main(String[] args) { // The required args are zkConnectString, dcNumber, service, date, hour, // component and an arbitrary number of commands. if (args.length < 7) { printUsage();/*w ww . j a v a 2 s . co m*/ System.exit(1); } String zkConnectString = args[0]; String dcNumber = args[1]; String service = args[2]; String date = args[3]; String hour = args[4]; String component = args[5]; // Set the configuration correctly, so we can reach zookeeper Configuration conf = new Configuration(); conf.set("zk.connect.string", zkConnectString); try { LockUtil lockUtil = new LockUtil(conf); PathInfo pathInfo = new PathInfo(); pathInfo.setDcNumber(dcNumber); pathInfo.setService(service); pathInfo.setDate(date); pathInfo.setHour(hour); pathInfo.setComponent(component); String lockPath = lockUtil.getLockPath(pathInfo); // Get the write lock while (true) { try { lockUtil.acquireWriteLock(lockPath); break; } catch (KeeperException.ConnectionLossException e) { LOG.warn("Lost connection to ZooKeeper. Retrying.", e); } } // Run the commands int res = 0; for (int i = 6; i < args.length; i++) { String[] fsShellArgs = args[i].split("\\s+"); LOG.info("Calling FsShell with args {}", args[i]); FsShell shell = new FsShell(); try { res = ToolRunner.run(shell, fsShellArgs); } finally { shell.close(); } if (res != 0) { break; } } // Release the write lock while (true) { try { lockUtil.releaseWriteLock(lockPath); break; } catch (KeeperException.ConnectionLossException e) { LOG.warn("Lost connection to ZooKeeper. Retrying.", e); } } if (res != 0) { LOG.error("Bad return value ({}) from FsShell", res); System.exit(res); } } catch (Exception e) { e.printStackTrace(); System.exit(1); } }
From source file:com.blackberry.logdriver.util.Cat.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w w w .j a va2s . c om FileSystem fs = FileSystem.get(conf); // The command line options List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 2) { System.out.println("usage: [genericOptions] input [input ...] output"); System.exit(1); } // Get the files we need from the command line. for (int i = 0; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Cat.class); jobConf.setIfUnset("mapred.job.name", "Cat Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(CatMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.FastSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w w w . j av a2s. co m FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(FastSearch.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8"))); job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.Grep.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from ww w. java2 s .c o m FileSystem fs = FileSystem.get(conf); // The command line options String regex = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] regex input [input ...] output"); System.exit(1); } // Get the files we need from the command line. regex = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Grep.class); jobConf.setIfUnset("mapred.job.name", "Grep Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8"))); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(GrepMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.MultiSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//www . j a v a 2 s.c o m FileSystem fs = FileSystem.get(conf); // The command line options String searchStringDir = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchStringDir = args[0]; // We are going to be reading all the files in this directory a lot. So // let's up the replication factor by a lot so that they're easy to read. for (FileStatus f : fs.listStatus(new Path(searchStringDir))) { fs.setReplication(f.getPath(), (short) 16); } for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(MultiSearch.class); jobConf.setIfUnset("mapred.job.name", "MultiSearch"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string.dir", searchStringDir); // This search is generally too fast to make good use of 128MB blocks, so // let's set the value to 256MB (if it's not set already) if (jobConf.get("mapred.max.split.size") == null) { jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024); } job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.Search.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/*from www .jav a2 s.c o m*/ FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Search.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", searchString); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }