List of usage examples for org.apache.hadoop.conf Configuration addResource
public void addResource(Configuration conf)
From source file:edu.buffalo.cse.dic.mapreduce.WordCount.java
License:Apache License
@Override public Map<String, Number> start(String inputFile) { try {//from ww w. j a v a2s.c o m LinkedHashMap<String, Number> topTen = new LinkedHashMap<>(); Configuration conf = new Configuration(); conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/core-site.xml")); conf.addResource(new Path("/usr/local/hadoop/etc/hadoop/hdfs-site.xml")); FileSystem fs = FileSystem.get(new URI("wordcount"), conf); fs.delete(new Path("wordcount")); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(inputFile)); FileOutputFormat.setOutputPath(job, new Path("wordcount")); job.waitForCompletion(true); System.out.println("word count done"); FileSystem fsa = FileSystem.get(new URI("wordcount"), conf); fsa.delete(new Path("wordcountfinal")); Job sortJob = new Job(conf, "sort reducer"); sortJob.setJarByClass(SortReducerOutput.class); sortJob.setMapperClass(OutputBreaker.class); sortJob.setSortComparatorClass(ReverseComparator.class); sortJob.setReducerClass(SortByCount.class); sortJob.setOutputKeyClass(IntWritable.class); sortJob.setOutputValueClass(Text.class); sortJob.setPartitionerClass(TotalOrderPartitioner.class); Path partitionFile = new Path("trendcount", "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(sortJob.getConfiguration(), partitionFile); FileInputFormat.addInputPath(sortJob, new Path("wordcount/part-r-00000")); FileOutputFormat.setOutputPath(sortJob, new Path("wordcountfinal")); sortJob.waitForCompletion(true); System.out.println("sort word count"); Path output = new Path("wordcountfinal/part-r-00000"); FileSystem fileSystem = FileSystem.get(output.toUri(), conf); FileStatus[] items = fileSystem.listStatus(output); for (FileStatus item : items) { InputStream stream = null; // ignoring files like _SUCCESS if (item.getPath().getName().startsWith("_")) { continue; } else { stream = fileSystem.open(item.getPath()); } Scanner scan = new Scanner(stream).useDelimiter("\\n"); for (int i = 0; i < 10; i++) { if (scan.hasNext()) { String data = scan.next(); topTen.put(data.split("\\t")[1], Integer.parseInt(data.split("\\t")[0])); } } } return topTen; } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (URISyntaxException e) { e.printStackTrace(); } return null; }
From source file:edu.cooper.cloud.Normalize.java
License:Apache License
public static void main(String[] args) throws Exception { String input = "datasets/train_subject01.csv"; String output = "output/trainX2.csv"; Configuration conf = new Configuration(); Map<String, String> env = System.getenv(); Path coreSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/core-site.xml"); Path hdfsSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/hdfs-site.xml"); Path yarnSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/yarn-site.xml"); Path mapredSiteXml = new Path(env.get("HADOOP_CONF_DIR") + "/mapred-site.xml"); conf.addResource(coreSiteXml); conf.addResource(hdfsSiteXml);//from w w w .ja v a 2 s .c o m conf.addResource(yarnSiteXml); conf.addResource(mapredSiteXml); // String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // if (otherArgs.length != 2) { // System.err.println("Usage: wordcount <in> <out>"); // System.exit(2); // } Job job = new Job(conf, "normalize"); job.setJarByClass(Normalize.class); job.setMapperClass(NormalizeMapper.class); job.setCombinerClass(NormalizeCombiner.class); job.setReducerClass(NormalizeReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleArrayWritable.class); // job.setInputFormatClass(new FileInputFormat<IntWritable,DoubleArrayWritable>()); Path inputPath = new Path(input); System.out.println(inputPath); Path outputPath = new Path(output); System.out.println(outputPath); NLineInputFormat.addInputPath(job, inputPath); // FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); System.exit(job.waitForCompletion(true) ? 0 : 1); // Use means and std dev to normalize the data }
From source file:edu.emory.bmi.datacafe.hdfs.HdfsUtil.java
License:Open Source License
/** * Get the HDFS file system.//from w w w . ja v a2s . c o m * @return the hdfs file system * @throws java.io.IOException in getting the hdfs file system */ public static FileSystem getFileSystem() throws IOException { Configuration config = new Configuration(); config.addResource(new Path(ConfigReader.getHadoopConf() + File.separator + HDFSConstants.CORE_SITE_XML)); config.addResource(new Path(ConfigReader.getHadoopConf() + File.separator + HDFSConstants.HDFS_SITE_XML)); return FileSystem.get(config); }
From source file:edu.uci.ics.hyracks.imru.file.ConfigurationFactory.java
License:Apache License
public Configuration createConfiguration() throws HyracksDataException { if (!hasConf) return null; try {//from w w w.j a v a 2 s .co m Configuration conf = new Configuration(); conf.addResource(new Path(hadoopConfPath + "/core-site.xml")); conf.addResource(new Path(hadoopConfPath + "/mapred-site.xml")); conf.addResource(new Path(hadoopConfPath + "/hdfs-site.xml")); return conf; } catch (Exception e) { throw new HyracksDataException(e); } }
From source file:edu.umd.cloud9.example.hbase.HBaseWordCount.java
License:Apache License
/** * Runs this tool.// www .ja va2s . co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption( OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputTable = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; // If the table doesn't already exist, create it. Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HBaseAdmin admin = new HBaseAdmin(hbaseConfig); if (admin.tableExists(outputTable)) { LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable)); LOG.info(String.format("Disabling table '%s'", outputTable)); admin.disableTable(outputTable); LOG.info(String.format("Droppping table '%s'", outputTable)); admin.deleteTable(outputTable); } HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable)); for (int i = 0; i < FAMILIES.length; i++) { HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]); tableDesc.addFamily(hColumnDesc); } admin.createTable(tableDesc); LOG.info(String.format("Successfully created table '%s'", outputTable)); admin.close(); // Now we're ready to start running MapReduce. LOG.info("Tool: " + HBaseWordCount.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output table: " + outputTable); LOG.info(" - number of reducers: " + reduceTasks); Job job = Job.getInstance(conf); job.setJobName(HBaseWordCount.class.getSimpleName()); job.setJarByClass(HBaseWordCount.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.example.hbase.HBaseWordCountFetch.java
License:Apache License
/** * Runs this tool.//from w w w . j av a 2s . c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(TABLE)); options.addOption( OptionBuilder.withArgName("word").hasArg().withDescription("word to look up").create(WORD)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(TABLE) || !cmdline.hasOption(WORD)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String tableName = cmdline.getOptionValue(TABLE); String word = cmdline.getOptionValue(WORD); Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig); HTableInterface table = hbaseConnection.getTable(tableName); Get get = new Get(Bytes.toBytes(word)); Result result = table.get(get); int count = Bytes.toInt(result.getValue(HBaseWordCount.CF, HBaseWordCount.COUNT)); LOG.info("word: " + word + ", count: " + count); return 0; }
From source file:edu.umd.gorden2.BooleanRetrievalHBase.java
License:Apache License
/** * Runs this tool.//from w w w . j ava 2s .c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); //formatter.printHelp(LookupPostings.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig); table = hbaseConnection.getTable(indexPath); FileSystem fs = FileSystem.get(conf); collection = fs.open(new Path(collectionPath)); stack = new Stack<Set<Integer>>(); //initialize(indexPath, collectionPath, fs); String[] queries = { "outrageous fortune AND", "white rose AND", "means deceit AND", "white red OR rose AND pluck AND", "unhappy outrageous OR good your AND OR fortune AND" }; for (String q : queries) { System.out.println("Query: " + q); runQuery(q); System.out.println(""); } return 1; }
From source file:edu.umd.gorden2.BuildInvertedIndexHBase.java
License:Apache License
/** * Runs this tool./*w w w.ja va 2 s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("HBase table name").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputTable = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HBaseAdmin admin = new HBaseAdmin(hbaseConfig); if (admin.tableExists(outputTable)) { LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable)); LOG.info(String.format("Disabling table '%s'", outputTable)); admin.disableTable(outputTable); LOG.info(String.format("Droppping table '%s'", outputTable)); admin.deleteTable(outputTable); } HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable)); for (int i = 0; i < FAMILIES.length; i++) { HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]); tableDesc.addFamily(hColumnDesc); } admin.createTable(tableDesc); LOG.info(String.format("Successfully created table '%s'", outputTable)); admin.close(); LOG.info("Tool name: " + BuildInvertedIndexHBase.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputTable); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(conf); job.setJobName(BuildInvertedIndexHBase.class.getSimpleName()); job.setJarByClass(BuildInvertedIndexHBase.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfInts.class); job.setMapperClass(MyMapper.class); //job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. // Path outputDir = new Path(outputPath); // FileSystem.get(getConf()).delete(outputDir, true); TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.honghongie.BooleanRetrievalHBase.java
License:Apache License
private void initialize(String tableName, String collectionPath, FileSystem fs) throws IOException { //initialize index Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig); index = hbaseConnection.getTable(tableName); collection = fs.open(new Path(collectionPath)); stack = new Stack<Set<Integer>>(); }
From source file:edu.umd.honghongie.BuildInvertedIndexHBase.java
License:Apache License
/** * Runs this tool.//from w ww . j ava 2 s . c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption( OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(OUTPUT)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputTable = cmdline.getOptionValue(OUTPUT); // If the table doesn't exist, create it Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HBaseAdmin admin = new HBaseAdmin(hbaseConfig); if (admin.tableExists(outputTable)) { LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable)); LOG.info(String.format("Disabling table '%s'", outputTable)); admin.disableTable(outputTable); LOG.info(String.format("Droppping table '%s'", outputTable)); admin.deleteTable(outputTable); } HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable)); for (int i = 0; i < FAMILIES.length; i++) { HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]); tableDesc.addFamily(hColumnDesc); } admin.createTable(tableDesc); LOG.info(String.format("Successfully created table '%s'", outputTable)); admin.close(); // Now we are ready to start running mapreduce LOG.info("Tool name: " + BuildInvertedIndexHBase.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output table: " + outputTable); Job job = Job.getInstance(getConf()); job.setJobName(BuildInvertedIndexHBase.class.getSimpleName()); job.setJarByClass(BuildInvertedIndexHBase.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfInts.class); job.setMapperClass(MyMapper.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }