Example usage for org.apache.hadoop.conf Configuration Configuration

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration Configuration.

Prototype

public Configuration()

Source Link

Document

A new configuration.

Usage

From source file:WordCount_SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w  w w.ja v  a 2 s. com*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);

    job.setCombinerClass(IntSumReducer.class);

    // disable combiner
    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartition.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:BooleanRetrievalCompressed.java

License:Apache License

/**
 * Runs this tool./*  www.  j  av  a 2 s  .c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws IOException {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostingsCompressed.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    FileSystem fs = FileSystem.get(new Configuration());

    initialize(indexPath, collectionPath, fs);

    String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND",
            "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" };

    for (String q : queries) {
        System.out.println("Query: " + q);

        runQuery(q);
        System.out.println("");
    }
    return 1;
}

From source file:GetRetweetersAndCountPerUser.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>");
        System.exit(2);/*from w ww . j  a v  a2 s  . com*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(RetweetersPerUser.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    System.out.println(otherArgs[0]);
    job.setMapperClass(TweetMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    if (job.waitForCompletion(true)) {
        FileSystem hdfs = FileSystem.get(new URI(args[1]), conf);
        Path dir = new Path(args[1]);
        PathFilter filter = new PathFilter() {
            public boolean accept(Path file) {
                return file.getName().startsWith("part-r-");
            }
        };

        HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>();
        FileStatus[] files = hdfs.listStatus(dir, filter);
        Arrays.sort(files);
        for (int i = 0; i != files.length; i++) {
            Path pt = files[i].getPath();
            BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt)));
            String line = null;
            while ((line = br.readLine()) != null) {
                String[] columns = new String[2];
                columns = line.split("\t");
                int key = Integer.parseInt(columns[0]);
                if (counts_for_user.containsKey(key))
                    counts_for_user.put(key, counts_for_user.get(key) + 1);
                else
                    counts_for_user.put(key, 1);
            }
            br.close();
        }

        FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count"));
        PrintWriter writer = new PrintWriter(fsDataOutputStream);
        for (Entry<Integer, Integer> e : counts_for_user.entrySet()) {
            writer.write(e.getKey() + "\t" + e.getValue() + "\n");
        }
        writer.close();
        fsDataOutputStream.close();
        hdfs.close();
        System.exit(0);
    }
    System.exit(1);
}

From source file:WordCount_PerMapTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/* w w w .  j  a  v a  2 s  .c  om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);

    // disable combiner
    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartitioner.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:SiCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);// w  w w . j  a v  a2s  .c  o m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(SiCombiner.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:PopulateBseData.java

License:Apache License

public PopulateBseData() throws IOException {
    Configuration conf = new Configuration();
    conf.set("hbase.zookeeper.quorum", "localhost");
    conf.set("hbase.zookeeper.property.clientPort", "2181");
    hTable = new HTable(conf, "stockData1");
}

From source file:PerMapTally.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from  w  w w.  j  a v a2s .  c  o  m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(PerMapTally.class);
    job.setMapperClass(TokenizerMapper.class);
    // Aniket changes starts
    /* Here the partitioner is being called*/
    job.setPartitionerClass(WordPartitioner.class);
    // Aniket changes ends
    // Part 3 Aniket changes starts
    /* Here I am just disabling the combiner */
    // job.setCombinerClass(IntSumReducer.class);
    // Part 3 Aniket changes ends
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:UnmanagedAMLauncher.java

License:Apache License

public UnmanagedAMLauncher() throws Exception {
    this(new Configuration());
}

From source file:Relevance.java

License:Apache License

/**
 * Exact relevance is slower, non-exact relevance will have false positives
 *//*from   w w  w  . j  av a  2 s  .  c  o m*/
protected void batch_query(Tap source, Tap output, Fields wantedFields, RelevanceFunction func, Tap keysTap,
        String keyField, boolean useBloom, int bloom_bits, int bloom_hashes, boolean exact) throws IOException {
    if (!useBloom && !exact)
        throw new IllegalArgumentException("Must either use bloom filter or be exact, or both!");

    FileSystem fs = FileSystem.get(new Configuration());
    Pipe finalPipe = new Pipe("data");
    finalPipe = new Each(finalPipe, wantedFields, new Identity());

    Map<String, Tap> sources = new HashMap<String, Tap>();

    sources.put("data", source);
    Map properties = new HashMap();

    String bloomFilterPath = "/tmp/" + UUID.randomUUID().toString() + ".bloomfilter";
    if (useBloom) {
        String jobId = UUID.randomUUID().toString();

        LOG.info("Creating bloom filter");
        writeOutBloomFilter(keysTap, keyField, fs, bloomFilterPath, bloom_bits, bloom_hashes);
        properties.put("mapred.job.reuse.jvm.num.tasks", -1);
        if (!TEST_MODE) {
            properties.put("mapred.cache.files", "hdfs://" + bloomFilterPath);
        } else {
            properties.put("batch_query.relevance.file", bloomFilterPath);
        }
        LOG.info("Done creating bloom filter");

        finalPipe = new Each(finalPipe, wantedFields, getRelevanceFilter(func, jobId));

    }

    if (exact) {
        sources.put("relevant", keysTap);

        Pipe relevantRecords = new Pipe("relevant");
        relevantRecords = new Each(relevantRecords, new Fields(keyField), new Identity());

        finalPipe = new Each(finalPipe, wantedFields, getExactFilter(func),
                Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT)));

        finalPipe = new CoGroup(finalPipe, new Fields(RELEVANT_OBJECT), relevantRecords, new Fields(keyField),
                Fields.join(wantedFields, new Fields(ID, RELEVANT_OBJECT), new Fields("__ignored")));

        finalPipe = new Each(finalPipe, Fields.join(wantedFields, new Fields(ID)), new Identity());

        if (func.canHaveMultipleMatches()) {
            finalPipe = new Distinct(finalPipe, new Fields(ID));
        }
        finalPipe = new Each(finalPipe, wantedFields, new Identity());
    }

    Flow flow = new FlowConnector(properties).connect("Relevance: " + func.getClass().getSimpleName(), sources,
            output, finalPipe);
    flow.complete();

    if (useBloom)
        fs.delete(new Path(bloomFilterPath), false);
}

From source file:WordCountCounters.java

License:Apache License

public static void main(String[] args) throws Exception {
    // Let ToolRunner handle generic command-line options
    ToolRunner.run(new Configuration(), new WordCountCounters(), args);
    System.exit(0);/* www . j a  v a 2s.c  o  m*/
}