Example usage for org.apache.hadoop.conf Configuration Configuration

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration Configuration.

Prototype

public Configuration()

Source Link

Document

A new configuration.

Usage

From source file:ClassAverage.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Class Average");

    job.setJarByClass(ClassAverage.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(DoubleCalcReducer.class);
    job.setReducerClass(DoubleCalcReducer.class);

    job.setOutputKeyClass(Text.class);

}

From source file:DateExample_Year.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from w ww  . j  a  va2 s.c  om
    }
    Job job = new Job(conf, "word count fs");
    job.setJarByClass(DateExample_Year.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(IsValidKeyFormat.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:RunText.java

License:Apache License

public static void main(String[] args) throws Exception {
    o = new Options();
    JCommander jc = null;/*from w w  w.  j  a va2 s  .co  m*/
    try {
        jc = new JCommander(o, args);
        jc.setProgramName("./runText");
    } catch (ParameterException e) {
        System.out.println(e.getMessage());
        String[] valid = { "-p", "path", "-d", "delimiter", "v", "value", "-i", "index" };
        new JCommander(o, valid).usage();
        System.exit(-1);
    }
    if (o.help) {
        jc.usage();
        System.exit(0);
    }
    path = new Path(o.path);
    delim = o.delimiter.getBytes()[0];
    toFind = o.value;
    index = o.index;
    numThreads = o.threads;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    TextInputFormat format = new TextInputFormat();
    long len = fs.getFileStatus(path).getLen() / numThreads;

    List<Thread> threads = Lists.newArrayList();

    for (int i = 0; i < numThreads; i++) {
        FileSplit split = new FileSplit(path, i * len, len, new String[] { "" });
        threads.add(new Thread(new RunText(split, format)));
    }

    runningThreads = new AtomicInteger(numThreads);

    for (Thread t : threads) {
        t.start();
    }

    int prev = 0;
    int current;
    long t1 = System.nanoTime();
    long t2;
    while (runningThreads.get() > 0) {
        Thread.sleep(5000);
        current = totalCount.get();
        t2 = System.nanoTime();
        System.out.println(String.format("%f records/sec", (current - prev) * 1e9 / (t2 - t1)));
        t1 = t2;
        prev = current;
    }

    for (Thread t : threads) {
        t.join();
    }

    fs.close();
}

From source file:DataHBase.java

License:Open Source License

public void run(HashMap<String, String> config) throws Exception {

    //clean the former output if it exists
    Path p = new Path(config.get("hdfs_output_dir"));
    FileSystem fs = FileSystem.get(new Configuration());
    if (fs.exists(p)) {
        fs.delete(p, true);//www.  j  ava 2s  .co m
    }

    String junction = config.get("what_to_find"); // the name of the junction
    String date1 = config.get("date1");
    String date2 = config.get("date2");
    //date1 and date2 can be of a format YYYY-MM-DD
    if (date1.length() == 10)
        date1 = date1 + " 00:00:00";
    if (date2.length() == 10)
        date2 = date2 + " 23:59:59";
    System.out.println("Looking for data of " + junction + ": " + date1 + " - " + date2);

    //create timestamps (considering time zone!) to limit data
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    sdf.setTimeZone(TimeZone.getDefault());
    Long time1 = sdf.parse(date1).getTime();
    Long time2 = sdf.parse(date2).getTime();

    //run a job
    Configuration conf = HBaseConfiguration.create();
    conf.set("mapreduce.output.textoutputformat.separator", ","); //set comma as a delimiter

    Job job = new Job(conf, "Retrieve data from hbase");
    job.setJarByClass(DataHBase.class);

    Scan scan = new Scan();
    scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
    scan.setCacheBlocks(false); // don't set to true for MR jobs
    scan.setMaxVersions(1);
    scan.setTimeRange(time1, time2); //take a day we are interested in
    //set a filter for a junction name
    if (!junction.equals("")) {
        SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("data"),
                Bytes.toBytes("location_name"), CompareOp.EQUAL, Bytes.toBytes(junction));
        scan.setFilter(filter);
    }
    //add the specific columns to the output to limit the amount of data
    scan.addFamily(Bytes.toBytes("data"));

    TableMapReduceUtil.initTableMapperJob(config.get("hbase_table"), // input HBase table name
            scan, // Scan instance to control CF and attribute selection
            TableMap.class, // mapper
            Text.class, // mapper output key
            Text.class, // mapper output value
            job);

    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(config.get("hdfs_output_dir")));

    job.waitForCompletion(true);

}

From source file:BooleanRetrieval.java

License:Apache License

/**
 * Runs this tool.//w  w  w  . j a  v  a  2 s.c o  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostings.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    FileSystem fs = FileSystem.get(new Configuration());

    initialize(indexPath, collectionPath, fs);

    String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND",
            "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" };

    for (String q : queries) {
        System.out.println("Query: " + q);

        runQuery(q);
        System.out.println("");
    }

    return 1;
}

From source file:q6.java

@Override
public void init() {
    try {//ww  w .  j  a v  a 2 s  . com
        TEAM = "DEADLINE,276906431060,152339165514,931814217121\n";
        tableName = "q6";
        conf = new Configuration();
        conf.set("hbase.master", publicDNS + ":60000");
        conf.set("hbase.zookeeper.quorum", publicDNS);
        conf.setInt("hbase.hconnection.threads.max", 80);
        conf.setInt("hbase.zookeeper.property.maxClientCnxns", 100);
        connection = HConnectionManager.createConnection(conf);
        System.out.println("try connecting");
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:CustomAuthenticator.java

License:Apache License

public static char[] getPassword(String credentialProvider, String alias) throws IOException {
    Configuration conf = new Configuration();
    conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, credentialProvider);
    return conf.getPassword(alias);
}

From source file:lab2_2.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem.get(conf).delete(new Path(args[1]), true);

    Job job = Job.getInstance(conf, "drive time lab 2.1");
    job.setJarByClass(lab2_1.class);
    job.setMapperClass(PartitioningMapper.class);
    job.setPartitionerClass(TypePartitioner.class);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(6);//from ww w.  jav a  2s  .c  om

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:TestBytesBloomFilter.java

License:Apache License

public void testSetSanity() throws IOException {
    FileSystem local = FileSystem.getLocal(new Configuration());

    BytesBloomFilter set = new BytesBloomFilter(1000000, 4);
    byte[] arr1 = new byte[] { 1, 2, 3, 4, 5, 6, 7 };
    byte[] arr2 = new byte[] { 11, 12, 5, -2 };
    byte[] arr3 = new byte[] { 3, 4, 5 };
    set.add(arr1);//  ww w. ja va 2  s  .c o m
    set.add(arr2);

    for (byte i = 0; i < (byte) 125; i++) {
        set.add(new byte[] { i });
    }

    assertTrue(set.mayContain(arr1));
    assertTrue(set.mayContain(arr2));

    for (byte i = 0; i < (byte) 125; i++) {
        assertTrue(set.mayContain(new byte[] { i }));
    }

    //technically this could be an invalid statement, but the probability is low and this is a sanity check
    assertFalse(set.mayContain(arr3));

    //now test that we can write and read from file just fine
    local.delete(new Path("/tmp/filter-test.bloomfilter"), false);
    DataOutputStream os = new DataOutputStream(new FileOutputStream("/tmp/filter-test.bloomfilter"));
    set.write(os);
    os.close();

    BytesBloomFilter set2 = new BytesBloomFilter();
    DataInputStream is = new DataInputStream(new FileInputStream("/tmp/filter-test.bloomfilter"));
    set2.readFields(is);

    assertTrue(set2.mayContain(arr1));
    assertTrue(set2.mayContain(arr2));

    for (byte i = 0; i < (byte) 125; i++) {
        assertTrue(set2.mayContain(new byte[] { i }));
    }

    //technically this could be an invalid statement, but the probability is low and this is a sanity check
    assertFalse(set2.mayContain(arr3));

}

From source file:BwaAlignmentBase.java

License:Open Source License

public ArrayList<String> copyResults(String outputSamFileName) {
    ArrayList<String> returnedValues = new ArrayList<String>();
    String outputDir = this.bwaInterpreter.getOutputHdfsDir();

    Configuration conf = new Configuration();
    try {//w w  w.j av a2s.  c om
        FileSystem fs = FileSystem.get(conf);

        fs.copyFromLocalFile(new Path(this.bwaInterpreter.getOutputFile()),
                new Path(outputDir + "/" + outputSamFileName));

        // Delete the old results file
        File tmpSamFullFile = new File(this.bwaInterpreter.getOutputFile());
        tmpSamFullFile.delete();
    } catch (IOException e) {
        e.printStackTrace();
        this.LOG.error(e.toString());
    }

    returnedValues.add(outputDir + "/" + outputSamFileName);

    return returnedValues;
}