List of usage examples for org.apache.hadoop.conf Configuration Configuration
public Configuration()
From source file:WordCount_SiCombiner.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from w w w.ja v a 2 s. com*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount_SiCombiner.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); // disable combiner // job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(WordPartition.class); job.setNumReduceTasks(5); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:BooleanRetrievalCompressed.java
License:Apache License
/** * Runs this tool./* www. j av a 2 s .c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws IOException { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostingsCompressed.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } FileSystem fs = FileSystem.get(new Configuration()); initialize(indexPath, collectionPath, fs); String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND", "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" }; for (String q : queries) { System.out.println("Query: " + q); runQuery(q); System.out.println(""); } return 1; }
From source file:GetRetweetersAndCountPerUser.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>"); System.exit(2);/*from w ww . j a v a2 s . com*/ } Job job = new Job(conf, "word count"); job.setJarByClass(RetweetersPerUser.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); System.out.println(otherArgs[0]); job.setMapperClass(TweetMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(Integer.parseInt(args[2])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); if (job.waitForCompletion(true)) { FileSystem hdfs = FileSystem.get(new URI(args[1]), conf); Path dir = new Path(args[1]); PathFilter filter = new PathFilter() { public boolean accept(Path file) { return file.getName().startsWith("part-r-"); } }; HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>(); FileStatus[] files = hdfs.listStatus(dir, filter); Arrays.sort(files); for (int i = 0; i != files.length; i++) { Path pt = files[i].getPath(); BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt))); String line = null; while ((line = br.readLine()) != null) { String[] columns = new String[2]; columns = line.split("\t"); int key = Integer.parseInt(columns[0]); if (counts_for_user.containsKey(key)) counts_for_user.put(key, counts_for_user.get(key) + 1); else counts_for_user.put(key, 1); } br.close(); } FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count")); PrintWriter writer = new PrintWriter(fsDataOutputStream); for (Entry<Integer, Integer> e : counts_for_user.entrySet()) { writer.write(e.getKey() + "\t" + e.getValue() + "\n"); } writer.close(); fsDataOutputStream.close(); hdfs.close(); System.exit(0); } System.exit(1); }
From source file:WordCount_PerMapTally.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/* w w w . j a v a 2 s .c om*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount_PerMapTally.class); job.setMapperClass(TokenizerMapper.class); // disable combiner // job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(WordPartitioner.class); job.setNumReduceTasks(5); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:SiCombiner.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// w w w . j a v a2s .c o m } Job job = new Job(conf, "word count"); job.setJarByClass(SiCombiner.class); job.setMapperClass(TokenizerMapper.class); // Aniket changes starts /* Here the partitioner is being called*/ job.setPartitionerClass(WordPartitioner.class); // Aniket changes ends job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:PopulateBseData.java
License:Apache License
public PopulateBseData() throws IOException { Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", "localhost"); conf.set("hbase.zookeeper.property.clientPort", "2181"); hTable = new HTable(conf, "stockData1"); }
From source file:PerMapTally.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from w w w. j a v a2s . c o m } Job job = new Job(conf, "word count"); job.setJarByClass(PerMapTally.class); job.setMapperClass(TokenizerMapper.class); // Aniket changes starts /* Here the partitioner is being called*/ job.setPartitionerClass(WordPartitioner.class); // Aniket changes ends // Part 3 Aniket changes starts /* Here I am just disabling the combiner */ // job.setCombinerClass(IntSumReducer.class); // Part 3 Aniket changes ends job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:UnmanagedAMLauncher.java
License:Apache License
public UnmanagedAMLauncher() throws Exception { this(new Configuration()); }
From source file:Relevance.java
License:Apache License
/** * Exact relevance is slower, non-exact relevance will have false positives *//*from w w w . j av a 2 s . c o m*/ protected void batch_query(Tap source, Tap output, Fields wantedFields, RelevanceFunction func, Tap keysTap, String keyField, boolean useBloom, int bloom_bits, int bloom_hashes, boolean exact) throws IOException { if (!useBloom && !exact) throw new IllegalArgumentException("Must either use bloom filter or be exact, or both!"); FileSystem fs = FileSystem.get(new Configuration()); Pipe finalPipe = new Pipe("data"); finalPipe = new Each(finalPipe, wantedFields, new Identity()); Map<String, Tap> sources = new HashMap<String, Tap>(); sources.put("data", source); Map properties = new HashMap(); String bloomFilterPath = "/tmp/" + UUID.randomUUID().toString() + ".bloomfilter"; if (useBloom) { String jobId = UUID.randomUUID().toString(); LOG.info("Creating bloom filter"); writeOutBloomFilter(keysTap, keyField, fs, bloomFilterPath, bloom_bits, bloom_hashes); properties.put("mapred.job.reuse.jvm.num.tasks", -1); if (!TEST_MODE) { properties.put("mapred.cache.files", "hdfs://" + bloomFilterPath); } else { properties.put("batch_query.relevance.file", bloomFilterPath); } LOG.info("Done creating bloom filter"); finalPipe = new Each(finalPipe, wantedFields, getRelevanceFilter(func, jobId)); } if (exact) { sources.put("relevant", keysTap); Pipe relevantRecords = new Pipe("relevant"); relevantRecords = new Each(relevantRecords, new Fields(keyField), new Identity()); finalPipe = new Each(finalPipe, wantedFields, getExactFilter(func), Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT))); finalPipe = new CoGroup(finalPipe, new Fields(RELEVANT_OBJECT), relevantRecords, new Fields(keyField), Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT), new Fields("__ignored"))); finalPipe = new Each(finalPipe, Fields.join(wantedFields, new Fields(ID)), new Identity()); if (func.canHaveMultipleMatches()) { finalPipe = new Distinct(finalPipe, new Fields(ID)); } finalPipe = new Each(finalPipe, wantedFields, new Identity()); } Flow flow = new FlowConnector(properties).connect("Relevance: " + func.getClass().getSimpleName(), sources, output, finalPipe); flow.complete(); if (useBloom) fs.delete(new Path(bloomFilterPath), false); }
From source file:WordCountCounters.java
License:Apache License
public static void main(String[] args) throws Exception { // Let ToolRunner handle generic command-line options ToolRunner.run(new Configuration(), new WordCountCounters(), args); System.exit(0);/* www . j a v a 2s.c o m*/ }