Example usage for org.apache.hadoop.conf Configuration addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration addResource.

Prototype

public void addResource(Configuration conf)

Source Link

Document

Add a configuration resource.

Usage

From source file:edu.buffalo.cse.dic.mapreduce.WordCount.java

License:Apache License

@Override
public Map<String, Number> start(String inputFile) {
    try {//from  ww w. j  a v a2s.c o m
        LinkedHashMap<String, Number> topTen = new LinkedHashMap<>();
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/core-site.xml"));
        conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/hdfs-site.xml"));

        FileSystem fs = FileSystem.get(new URI("wordcount"), conf);
        fs.delete(new Path("wordcount"));

        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(inputFile));
        FileOutputFormat.setOutputPath(job, new Path("wordcount"));
        job.waitForCompletion(true);
        System.out.println("word count done");

        FileSystem fsa = FileSystem.get(new URI("wordcount"), conf);
        fsa.delete(new Path("wordcountfinal"));

        Job sortJob = new Job(conf, "sort reducer");
        sortJob.setJarByClass(SortReducerOutput.class);
        sortJob.setMapperClass(OutputBreaker.class);
        sortJob.setSortComparatorClass(ReverseComparator.class);
        sortJob.setReducerClass(SortByCount.class);
        sortJob.setOutputKeyClass(IntWritable.class);
        sortJob.setOutputValueClass(Text.class);
        sortJob.setPartitionerClass(TotalOrderPartitioner.class);
        Path partitionFile = new Path("trendcount", "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(sortJob.getConfiguration(), partitionFile);
        FileInputFormat.addInputPath(sortJob, new Path("wordcount/part-r-00000"));
        FileOutputFormat.setOutputPath(sortJob, new Path("wordcountfinal"));
        sortJob.waitForCompletion(true);
        System.out.println("sort word count");

        Path output = new Path("wordcountfinal/part-r-00000");
        FileSystem fileSystem = FileSystem.get(output.toUri(), conf);
        FileStatus[] items = fileSystem.listStatus(output);
        for (FileStatus item : items) {
            InputStream stream = null;
            // ignoring files like _SUCCESS
            if (item.getPath().getName().startsWith("_")) {
                continue;
            } else {
                stream = fileSystem.open(item.getPath());
            }
            Scanner scan = new Scanner(stream).useDelimiter("\\n");
            for (int i = 0; i < 10; i++) {
                if (scan.hasNext()) {
                    String data = scan.next();
                    topTen.put(data.split("\\t")[1], Integer.parseInt(data.split("\\t")[0]));
                }
            }
        }
        return topTen;
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    } catch (URISyntaxException e) {
        e.printStackTrace();
    }
    return null;
}

From source file:edu.cooper.cloud.Normalize.java

License:Apache License

public static void main(String[] args) throws Exception {

    String input = "datasets/train_subject01.csv";
    String output = "output/trainX2.csv";

    Configuration conf = new Configuration();
    Map<String, String> env = System.getenv();
    Path coreSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/core-site.xml");
    Path hdfsSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/hdfs-site.xml");
    Path yarnSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/yarn-site.xml");
    Path mapredSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/mapred-site.xml");
    conf.addResource(coreSiteXml);
    conf.addResource(hdfsSiteXml);//from   w w  w .ja v  a 2 s .c o m
    conf.addResource(yarnSiteXml);
    conf.addResource(mapredSiteXml);

    //        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //        if (otherArgs.length != 2) {
    //            System.err.println("Usage: wordcount <in> <out>");
    //            System.exit(2);
    //        }

    Job job = new Job(conf, "normalize");
    job.setJarByClass(Normalize.class);
    job.setMapperClass(NormalizeMapper.class);
    job.setCombinerClass(NormalizeCombiner.class);
    job.setReducerClass(NormalizeReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleArrayWritable.class);
    //        job.setInputFormatClass(new FileInputFormat<IntWritable,DoubleArrayWritable>());

    Path inputPath = new Path(input);
    System.out.println(inputPath);
    Path outputPath = new Path(output);
    System.out.println(outputPath);

    NLineInputFormat.addInputPath(job, inputPath);
    //        FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    System.exit(job.waitForCompletion(true) ? 0 : 1);

    //        Use means and std dev to normalize the data

}

From source file:edu.emory.bmi.datacafe.hdfs.HdfsUtil.java

License:Open Source License

/**
 * Get the HDFS file system.//from  w w w .  ja  v  a2s  .  c  o m
 * @return the hdfs file system
 * @throws java.io.IOException in getting the hdfs file system
 */
public static FileSystem getFileSystem() throws IOException {
    Configuration config = new Configuration();
    config.addResource(new Path(ConfigReader.getHadoopConf() + File.separator + HDFSConstants.CORE_SITE_XML));
    config.addResource(new Path(ConfigReader.getHadoopConf() + File.separator + HDFSConstants.HDFS_SITE_XML));

    return FileSystem.get(config);
}

From source file:edu.uci.ics.hyracks.imru.file.ConfigurationFactory.java

License:Apache License

public Configuration createConfiguration() throws HyracksDataException {
    if (!hasConf)
        return null;
    try {//from  w  w  w.j a  v a  2  s .co  m
        Configuration conf = new Configuration();
        conf.addResource(new Path(hadoopConfPath + "/core-site.xml"));
        conf.addResource(new Path(hadoopConfPath + "/mapred-site.xml"));
        conf.addResource(new Path(hadoopConfPath + "/hdfs-site.xml"));
        return conf;
    } catch (Exception e) {
        throw new HyracksDataException(e);
    }
}

From source file:edu.umd.cloud9.example.hbase.HBaseWordCount.java

License:Apache License

/**
 * Runs this tool.//  www .ja va2s  .  co  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(
            OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputTable = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    // If the table doesn't already exist, create it.
    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    if (admin.tableExists(outputTable)) {
        LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable));
        LOG.info(String.format("Disabling table '%s'", outputTable));
        admin.disableTable(outputTable);
        LOG.info(String.format("Droppping table '%s'", outputTable));
        admin.deleteTable(outputTable);
    }

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable));
    for (int i = 0; i < FAMILIES.length; i++) {
        HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]);
        tableDesc.addFamily(hColumnDesc);
    }
    admin.createTable(tableDesc);
    LOG.info(String.format("Successfully created table '%s'", outputTable));

    admin.close();

    // Now we're ready to start running MapReduce.
    LOG.info("Tool: " + HBaseWordCount.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output table: " + outputTable);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(conf);
    job.setJobName(HBaseWordCount.class.getSimpleName());
    job.setJarByClass(HBaseWordCount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.example.hbase.HBaseWordCountFetch.java

License:Apache License

/**
 * Runs this tool.//from   w w w  . j  av a 2s  . c o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(TABLE));
    options.addOption(
            OptionBuilder.withArgName("word").hasArg().withDescription("word to look up").create(WORD));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(TABLE) || !cmdline.hasOption(WORD)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String tableName = cmdline.getOptionValue(TABLE);
    String word = cmdline.getOptionValue(WORD);

    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig);
    HTableInterface table = hbaseConnection.getTable(tableName);

    Get get = new Get(Bytes.toBytes(word));
    Result result = table.get(get);

    int count = Bytes.toInt(result.getValue(HBaseWordCount.CF, HBaseWordCount.COUNT));

    LOG.info("word: " + word + ", count: " + count);

    return 0;
}

From source file:edu.umd.gorden2.BooleanRetrievalHBase.java

License:Apache License

/**
 * Runs this tool.//from   w  w  w  .  j ava  2s  .c o  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        //formatter.printHelp(LookupPostings.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig);
    table = hbaseConnection.getTable(indexPath);

    FileSystem fs = FileSystem.get(conf);
    collection = fs.open(new Path(collectionPath));
    stack = new Stack<Set<Integer>>();

    //initialize(indexPath, collectionPath, fs);

    String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND",
            "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" };

    for (String q : queries) {
        System.out.println("Query: " + q);

        runQuery(q);
        System.out.println("");
    }

    return 1;
}

From source file:edu.umd.gorden2.BuildInvertedIndexHBase.java

License:Apache License

/**
 * Runs this tool./*w w w.ja  va  2 s .  c  o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("HBase table name").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputTable = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));
    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    if (admin.tableExists(outputTable)) {
        LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable));
        LOG.info(String.format("Disabling table '%s'", outputTable));
        admin.disableTable(outputTable);
        LOG.info(String.format("Droppping table '%s'", outputTable));
        admin.deleteTable(outputTable);
    }

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable));
    for (int i = 0; i < FAMILIES.length; i++) {
        HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]);
        tableDesc.addFamily(hColumnDesc);
    }
    admin.createTable(tableDesc);
    LOG.info(String.format("Successfully created table '%s'", outputTable));

    admin.close();

    LOG.info("Tool name: " + BuildInvertedIndexHBase.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputTable);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildInvertedIndexHBase.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndexHBase.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfInts.class);

    job.setMapperClass(MyMapper.class);
    //job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    // Path outputDir = new Path(outputPath);
    // FileSystem.get(getConf()).delete(outputDir, true);

    TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.honghongie.BooleanRetrievalHBase.java

License:Apache License

private void initialize(String tableName, String collectionPath, FileSystem fs) throws IOException {
    //initialize index
    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig);
    index = hbaseConnection.getTable(tableName);

    collection = fs.open(new Path(collectionPath));
    stack = new Stack<Set<Integer>>();
}

From source file:edu.umd.honghongie.BuildInvertedIndexHBase.java

License:Apache License

/**
 * Runs this tool.//from   w ww .  j  ava  2 s .  c o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(
            OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(OUTPUT));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputTable = cmdline.getOptionValue(OUTPUT);

    // If the table doesn't exist, create it
    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    if (admin.tableExists(outputTable)) {
        LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable));
        LOG.info(String.format("Disabling table '%s'", outputTable));
        admin.disableTable(outputTable);
        LOG.info(String.format("Droppping table '%s'", outputTable));
        admin.deleteTable(outputTable);
    }

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable));
    for (int i = 0; i < FAMILIES.length; i++) {
        HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]);
        tableDesc.addFamily(hColumnDesc);
    }
    admin.createTable(tableDesc);
    LOG.info(String.format("Successfully created table '%s'", outputTable));

    admin.close();

    // Now we are ready to start running mapreduce

    LOG.info("Tool name: " + BuildInvertedIndexHBase.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output table: " + outputTable);

    Job job = Job.getInstance(getConf());
    job.setJobName(BuildInvertedIndexHBase.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndexHBase.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfInts.class);

    job.setMapperClass(MyMapper.class);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}