Example usage for org.apache.hadoop.conf Configuration addResource

List of usage examples for org.apache.hadoop.conf Configuration addResource

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration addResource.

Prototype

public void addResource(Configuration conf) 

Source Link

Document

Add a configuration resource.

Usage

From source file:edu.buffalo.cse.dic.mapreduce.WordCount.java

License:Apache License

@Override
public Map<String, Number> start(String inputFile) {
    try {//from  ww w. j  a v a2s.c o m
        LinkedHashMap<String, Number> topTen = new LinkedHashMap<>();
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/core-site.xml"));
        conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/hdfs-site.xml"));

        FileSystem fs = FileSystem.get(new URI("wordcount"), conf);
        fs.delete(new Path("wordcount"));

        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(inputFile));
        FileOutputFormat.setOutputPath(job, new Path("wordcount"));
        job.waitForCompletion(true);
        System.out.println("word count done");

        FileSystem fsa = FileSystem.get(new URI("wordcount"), conf);
        fsa.delete(new Path("wordcountfinal"));

        Job sortJob = new Job(conf, "sort reducer");
        sortJob.setJarByClass(SortReducerOutput.class);
        sortJob.setMapperClass(OutputBreaker.class);
        sortJob.setSortComparatorClass(ReverseComparator.class);
        sortJob.setReducerClass(SortByCount.class);
        sortJob.setOutputKeyClass(IntWritable.class);
        sortJob.setOutputValueClass(Text.class);
        sortJob.setPartitionerClass(TotalOrderPartitioner.class);
        Path partitionFile = new Path("trendcount", "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(sortJob.getConfiguration(), partitionFile);
        FileInputFormat.addInputPath(sortJob, new Path("wordcount/part-r-00000"));
        FileOutputFormat.setOutputPath(sortJob, new Path("wordcountfinal"));
        sortJob.waitForCompletion(true);
        System.out.println("sort word count");

        Path output = new Path("wordcountfinal/part-r-00000");
        FileSystem fileSystem = FileSystem.get(output.toUri(), conf);
        FileStatus[] items = fileSystem.listStatus(output);
        for (FileStatus item : items) {
            InputStream stream = null;
            // ignoring files like _SUCCESS
            if (item.getPath().getName().startsWith("_")) {
                continue;
            } else {
                stream = fileSystem.open(item.getPath());
            }
            Scanner scan = new Scanner(stream).useDelimiter("\\n");
            for (int i = 0; i < 10; i++) {
                if (scan.hasNext()) {
                    String data = scan.next();
                    topTen.put(data.split("\\t")[1], Integer.parseInt(data.split("\\t")[0]));
                }
            }
        }
        return topTen;
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    } catch (URISyntaxException e) {
        e.printStackTrace();
    }
    return null;
}

From source file:edu.cooper.cloud.Normalize.java

License:Apache License

public static void main(String[] args) throws Exception {

    String input = "datasets/train_subject01.csv";
    String output = "output/trainX2.csv";

    Configuration conf = new Configuration();
    Map<String, String> env = System.getenv();
    Path coreSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/core-site.xml");
    Path hdfsSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/hdfs-site.xml");
    Path yarnSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/yarn-site.xml");
    Path mapredSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/mapred-site.xml");
    conf.addResource(coreSiteXml);
    conf.addResource(hdfsSiteXml);//from   w w  w .ja v  a 2 s .c o m
    conf.addResource(yarnSiteXml);
    conf.addResource(mapredSiteXml);

    //        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //        if (otherArgs.length != 2) {
    //            System.err.println("Usage: wordcount <in> <out>");
    //            System.exit(2);
    //        }

    Job job = new Job(conf, "normalize");
    job.setJarByClass(Normalize.class);
    job.setMapperClass(NormalizeMapper.class);
    job.setCombinerClass(NormalizeCombiner.class);
    job.setReducerClass(NormalizeReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleArrayWritable.class);
    //        job.setInputFormatClass(new FileInputFormat<IntWritable,DoubleArrayWritable>());

    Path inputPath = new Path(input);
    System.out.println(inputPath);
    Path outputPath = new Path(output);
    System.out.println(outputPath);

    NLineInputFormat.addInputPath(job, inputPath);
    //        FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    System.exit(job.waitForCompletion(true) ? 0 : 1);

    //        Use means and std dev to normalize the data

}

From source file:edu.emory.bmi.datacafe.hdfs.HdfsUtil.java

License:Open Source License

/**
 * Get the HDFS file system.//from  w w w .  ja  v  a2s  .  c  o m
 * @return the hdfs file system
 * @throws java.io.IOException in getting the hdfs file system
 */
public static FileSystem getFileSystem() throws IOException {
    Configuration config = new Configuration();
    config.addResource(new Path(ConfigReader.getHadoopConf() + File.separator + HDFSConstants.CORE_SITE_XML));
    config.addResource(new Path(ConfigReader.getHadoopConf() + File.separator + HDFSConstants.HDFS_SITE_XML));

    return FileSystem.get(config);
}

From source file:edu.uci.ics.hyracks.imru.file.ConfigurationFactory.java

License:Apache License

public Configuration createConfiguration() throws HyracksDataException {
    if (!hasConf)
        return null;
    try {//from  w  w  w.j a  v a  2  s .co  m
        Configuration conf = new Configuration();
        conf.addResource(new Path(hadoopConfPath + "/core-site.xml"));
        conf.addResource(new Path(hadoopConfPath + "/mapred-site.xml"));
        conf.addResource(new Path(hadoopConfPath + "/hdfs-site.xml"));
        return conf;
    } catch (Exception e) {
        throw new HyracksDataException(e);
    }
}

From source file:edu.umd.cloud9.example.hbase.HBaseWordCount.java

License:Apache License

/**
 * Runs this tool.//  www .ja va2s  .  co  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(
            OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputTable = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    // If the table doesn't already exist, create it.
    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    if (admin.tableExists(outputTable)) {
        LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable));
        LOG.info(String.format("Disabling table '%s'", outputTable));
        admin.disableTable(outputTable);
        LOG.info(String.format("Droppping table '%s'", outputTable));
        admin.deleteTable(outputTable);
    }

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable));
    for (int i = 0; i < FAMILIES.length; i++) {
        HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]);
        tableDesc.addFamily(hColumnDesc);
    }
    admin.createTable(tableDesc);
    LOG.info(String.format("Successfully created table '%s'", outputTable));

    admin.close();

    // Now we're ready to start running MapReduce.
    LOG.info("Tool: " + HBaseWordCount.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output table: " + outputTable);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(conf);
    job.setJobName(HBaseWordCount.class.getSimpleName());
    job.setJarByClass(HBaseWordCount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.example.hbase.HBaseWordCountFetch.java

License:Apache License

/**
 * Runs this tool.//from   w w w  . j  av a 2s  . c o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(TABLE));
    options.addOption(
            OptionBuilder.withArgName("word").hasArg().withDescription("word to look up").create(WORD));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(TABLE) || !cmdline.hasOption(WORD)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String tableName = cmdline.getOptionValue(TABLE);
    String word = cmdline.getOptionValue(WORD);

    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig);
    HTableInterface table = hbaseConnection.getTable(tableName);

    Get get = new Get(Bytes.toBytes(word));
    Result result = table.get(get);

    int count = Bytes.toInt(result.getValue(HBaseWordCount.CF, HBaseWordCount.COUNT));

    LOG.info("word: " + word + ", count: " + count);

    return 0;
}

From source file:edu.umd.gorden2.BooleanRetrievalHBase.java

License:Apache License

/**
 * Runs this tool.//from   w  w  w  .  j ava  2s  .c o  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        //formatter.printHelp(LookupPostings.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig);
    table = hbaseConnection.getTable(indexPath);

    FileSystem fs = FileSystem.get(conf);
    collection = fs.open(new Path(collectionPath));
    stack = new Stack<Set<Integer>>();

    //initialize(indexPath, collectionPath, fs);

    String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND",
            "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" };

    for (String q : queries) {
        System.out.println("Query: " + q);

        runQuery(q);
        System.out.println("");
    }

    return 1;
}

From source file:edu.umd.gorden2.BuildInvertedIndexHBase.java

License:Apache License

/**
 * Runs this tool./*w w w.ja  va  2 s .  c  o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("HBase table name").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputTable = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));
    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    if (admin.tableExists(outputTable)) {
        LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable));
        LOG.info(String.format("Disabling table '%s'", outputTable));
        admin.disableTable(outputTable);
        LOG.info(String.format("Droppping table '%s'", outputTable));
        admin.deleteTable(outputTable);
    }

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable));
    for (int i = 0; i < FAMILIES.length; i++) {
        HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]);
        tableDesc.addFamily(hColumnDesc);
    }
    admin.createTable(tableDesc);
    LOG.info(String.format("Successfully created table '%s'", outputTable));

    admin.close();

    LOG.info("Tool name: " + BuildInvertedIndexHBase.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputTable);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildInvertedIndexHBase.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndexHBase.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfInts.class);

    job.setMapperClass(MyMapper.class);
    //job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    // Path outputDir = new Path(outputPath);
    // FileSystem.get(getConf()).delete(outputDir, true);

    TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.honghongie.BooleanRetrievalHBase.java

License:Apache License

private void initialize(String tableName, String collectionPath, FileSystem fs) throws IOException {
    //initialize index
    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig);
    index = hbaseConnection.getTable(tableName);

    collection = fs.open(new Path(collectionPath));
    stack = new Stack<Set<Integer>>();
}

From source file:edu.umd.honghongie.BuildInvertedIndexHBase.java

License:Apache License

/**
 * Runs this tool.//from   w ww .  j  ava  2 s .  c o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(
            OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(OUTPUT));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputTable = cmdline.getOptionValue(OUTPUT);

    // If the table doesn't exist, create it
    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    if (admin.tableExists(outputTable)) {
        LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable));
        LOG.info(String.format("Disabling table '%s'", outputTable));
        admin.disableTable(outputTable);
        LOG.info(String.format("Droppping table '%s'", outputTable));
        admin.deleteTable(outputTable);
    }

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable));
    for (int i = 0; i < FAMILIES.length; i++) {
        HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]);
        tableDesc.addFamily(hColumnDesc);
    }
    admin.createTable(tableDesc);
    LOG.info(String.format("Successfully created table '%s'", outputTable));

    admin.close();

    // Now we are ready to start running mapreduce

    LOG.info("Tool name: " + BuildInvertedIndexHBase.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output table: " + outputTable);

    Job job = Job.getInstance(getConf());
    job.setJobName(BuildInvertedIndexHBase.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndexHBase.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfInts.class);

    job.setMapperClass(MyMapper.class);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}