List of usage examples for org.apache.hadoop.conf Configuration addResource
public void addResource(Configuration conf)
From source file:edu.umd.shrawanraina.BuildInvertedIndexHBase.java
License:Apache License
/** * Runs this tool.//from ww w . j av a 2 s . c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputTable = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; // If the table doesn't already exist, create it. Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HBaseAdmin admin = new HBaseAdmin(hbaseConfig); if (admin.tableExists(outputTable)) { LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable)); LOG.info(String.format("Disabling table '%s'", outputTable)); admin.disableTable(outputTable); LOG.info(String.format("Droppping table '%s'", outputTable)); admin.deleteTable(outputTable); } HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable)); for (int i = 0; i < FAMILIES.length; i++) { HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]); tableDesc.addFamily(hColumnDesc); } admin.createTable(tableDesc); LOG.info(String.format("Successfully created table '%s'", outputTable)); admin.close(); // Now we're ready to start running MapReduce. LOG.info("Tool: " + HBaseWordCount.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output table: " + outputTable); LOG.info(" - number of reducers: " + reduceTasks); Job job = Job.getInstance(conf); job.setJobName(BuildInvertedIndexHBaseCompressed.class.getSimpleName()); job.setJarByClass(BuildInvertedIndexHBaseCompressed.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfInts.class); job.setMapperClass(MyMapper.class); //job.setReducerClass(MyReducer.class); job.setNumReduceTasks(reduceTasks); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(PairOfWritables.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job); // Delete the output directory if it exists already. Path outputDir = new Path(outputTable); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.shrawanraina.BuildInvertedIndexHBaseCompressed.java
License:Apache License
/** * Runs this tool./*from ww w.j a v a 2s.c om*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputTable = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; // If the table doesn't already exist, create it. Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HBaseAdmin admin = new HBaseAdmin(hbaseConfig); if (admin.tableExists(outputTable)) { LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable)); LOG.info(String.format("Disabling table '%s'", outputTable)); admin.disableTable(outputTable); LOG.info(String.format("Droppping table '%s'", outputTable)); admin.deleteTable(outputTable); } HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable)); for (int i = 0; i < FAMILIES.length; i++) { HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]); tableDesc.addFamily(hColumnDesc); } admin.createTable(tableDesc); LOG.info(String.format("Successfully created table '%s'", outputTable)); admin.close(); // Now we're ready to start running MapReduce. LOG.info("Tool: " + HBaseWordCount.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output table: " + outputTable); LOG.info(" - number of reducers: " + reduceTasks); Job job = Job.getInstance(conf); job.setJobName(BuildInvertedIndexHBaseCompressed.class.getSimpleName()); job.setJarByClass(BuildInvertedIndexHBaseCompressed.class); job.setMapOutputKeyClass(PairOfStringInt.class); job.setMapOutputValueClass(VIntWritable.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(reduceTasks); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(PairOfWritables.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job); // Delete the output directory if it exists already. Path outputDir = new Path(outputTable); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.shrawanraina.HBaseWordCountFetch.java
License:Apache License
/** * Runs this tool./* w w w . jav a2 s . com*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("table").hasArg().withDescription("HBase table name").create(TABLE)); options.addOption( OptionBuilder.withArgName("word").hasArg().withDescription("word to look up").create(WORD)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(TABLE) || !cmdline.hasOption(WORD)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String tableName = cmdline.getOptionValue(TABLE); String word = cmdline.getOptionValue(WORD); Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HConnection hbaseConnection = HConnectionManager.createConnection(hbaseConfig); HTableInterface table = hbaseConnection.getTable(tableName); Get get = new Get(Bytes.toBytes(word)); Result result = table.get(get); int count = Bytes.toInt(result.getValue(HBaseWordCount.CF, HBaseWordCount.COUNT)); LOG.info("word: " + word + ", count: " + count); LOG.info("word: " + word + ", result: " + result.getValue(HBaseWordCount.CF, HBaseWordCount.COUNT)); return 0; }
From source file:edu.umd.windmemory.BuildInvertedIndexHBase.java
License:Apache License
/** * Runs this tool.//from w ww . j av a 2s. c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); // options.addOption(OptionBuilder.withArgName("num").hasArg() // .withDescription("number of reducers").create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputTable = cmdline.getOptionValue(OUTPUT); // int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? // Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; Configuration conf = getConf(); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); Configuration hbaseConfig = HBaseConfiguration.create(conf); HBaseAdmin admin = new HBaseAdmin(hbaseConfig); if (admin.tableExists(outputTable)) { LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable)); LOG.info(String.format("Disabling table '%s'", outputTable)); admin.disableTable(outputTable); LOG.info(String.format("Droppping table '%s'", outputTable)); admin.deleteTable(outputTable); } HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable)); for (int i = 0; i < FAMILIES.length; i++) { HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]); tableDesc.addFamily(hColumnDesc); } admin.createTable(tableDesc); LOG.info(String.format("Successfully created table '%s'", outputTable)); admin.close(); LOG.info("Tool name: " + BuildInvertedIndexHBase.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputTable); // LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BuildInvertedIndexHBase.class.getSimpleName()); job.setJarByClass(BuildInvertedIndexHBase.class); // job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); // FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfInts.class); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(PairOfWritables.class); // job.setOutputFormatClass(MapFileOutputFormat.class); job.setMapperClass(MyMapper.class); // job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. // Path outputDir = new Path(outputPath); // FileSystem.get(getConf()).delete(outputDir, true); TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:etl.cmd.test.XTestCase.java
License:Apache License
/** * Initialize the test working directory. <p> If it does not exist it creates it, if it already exists it deletes * all its contents. <p> The test working directory it is not deleted after the test runs. * * @throws Exception if the test workflow working directory could not be created *///w ww . j ava 2s.co m protected void setUp() throws Exception { RUNNING_TESTCASES.incrementAndGet(); super.setUp(); String baseDir = System.getProperty(OOZIE_TEST_DIR, new File("target/test-data").getAbsolutePath()); String msg = null; File f = new File(baseDir); if (!f.isAbsolute()) { msg = XLog.format("System property [{0}]=[{1}] must be set to an absolute path", OOZIE_TEST_DIR, baseDir); } else { if (baseDir.length() < 4) { msg = XLog.format("System property [{0}]=[{1}] path must be at least 4 chars", OOZIE_TEST_DIR, baseDir); } } if (msg != null) { System.err.println(); System.err.println(msg); System.exit(-1); } f.mkdirs(); if (!f.exists() || !f.isDirectory()) { System.err.println(); System.err.println(XLog.format("Could not create test dir [{0}]", baseDir)); System.exit(-1); } hadoopVersion = System.getProperty(HADOOP_VERSION, "0.20.0"); testCaseDir = createTestCaseDir(this, true); //setting up Oozie HOME and Oozie conf directory setSystemProperty(Services.OOZIE_HOME_DIR, testCaseDir); Services.setOozieHome(); testCaseConfDir = createTestCaseSubDir("conf"); // load test Oozie site String oozieTestDB = System.getProperty("oozie.test.db", "hsqldb"); String defaultOozieSize = new File("src/test/resources/" + oozieTestDB + "-oozie-site.xml") .getAbsolutePath(); String customOozieSite = System.getProperty("oozie.test.config.file", defaultOozieSize); File source = new File(customOozieSite); source = source.getAbsoluteFile(); InputStream oozieSiteSourceStream = null; if (source.exists()) { oozieSiteSourceStream = new FileInputStream(source); } else { // If we can't find it, try using the class loader (useful if we're using XTestCase from outside core) URL sourceURL = getClass().getClassLoader().getResource(oozieTestDB + "-oozie-site.xml"); if (sourceURL != null) { oozieSiteSourceStream = sourceURL.openStream(); } else { // If we still can't find it, then exit System.err.println(); System.err.println(XLog.format("Custom configuration file for testing does no exist [{0}]", source.getAbsolutePath())); System.err.println(); System.exit(-1); } } // Copy the specified oozie-site file from oozieSiteSourceStream to the test case dir as oozie-site.xml Configuration oozieSiteConf = new Configuration(false); oozieSiteConf.addResource(oozieSiteSourceStream); ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); InputStream inputStream = classLoader.getResourceAsStream(ConfigurationService.DEFAULT_CONFIG_FILE); XConfiguration configuration = new XConfiguration(inputStream); String classes = configuration.get(Services.CONF_SERVICE_CLASSES); // Disable sharelib service as it cannot find the sharelib jars // as maven has target/classes in classpath and not the jar because test phase is before package phase // if (System.getProperty("oozie.test.hadoop.minicluster", "true").equals("true")) oozieSiteConf.set(Services.CONF_SERVICE_CLASSES, classes.replaceAll("org.apache.oozie.service.ShareLibService,", "")); // Make sure to create the Oozie DB during unit tests oozieSiteConf.set(JPAService.CONF_CREATE_DB_SCHEMA, "true"); File target = new File(testCaseConfDir, "oozie-site.xml"); oozieSiteConf.writeXml(new FileOutputStream(target)); File hadoopConfDir = new File(testCaseConfDir, "hadoop-conf"); hadoopConfDir.mkdir(); File actionConfDir = new File(testCaseConfDir, "action-conf"); actionConfDir.mkdir(); source = new File("src/test/resources/hadoop-config.xml"); target = new File(hadoopConfDir, "core-site.xml"); IOUtils.copyStream(new FileInputStream(source), new FileOutputStream(target)); if (System.getProperty("oozielocal.log") == null) { setSystemProperty("oozielocal.log", "/tmp/oozielocal.log"); } if (System.getProperty("oozie.test.hadoop.security", "simple").equals("kerberos")) { System.setProperty("oozie.service.HadoopAccessorService.kerberos.enabled", "true"); } if (System.getProperty("oozie.test.hadoop.minicluster", "true").equals("true")) { setUpEmbeddedHadoop(getTestCaseDir()); // Second cluster is not necessary without the first one if (System.getProperty("oozie.test.hadoop.minicluster2", "false").equals("true")) { setUpEmbeddedHadoop2(); } } if (System.getProperty("oozie.test.db.host") == null) { System.setProperty("oozie.test.db.host", "localhost"); } setSystemProperty(ConfigurationService.OOZIE_DATA_DIR, testCaseDir); setSystemProperty(HadoopAccessorService.SUPPORTED_FILESYSTEMS, "*"); if (mrCluster != null) { OutputStream os = new FileOutputStream(new File(hadoopConfDir, "core-site.xml")); Configuration conf = mrCluster.getConfig(); conf.writeXml(os); os.close(); } }
From source file:eu.stratosphere.pact.test.util.filesystem.ExternalDFSProvider.java
License:Apache License
public void start() throws Exception { Configuration config = new Configuration(false); config.addResource(new Path(configDir + "/hadoop-default.xml")); config.addResource(new Path(configDir + "/hadoop-site.xml")); hdfs = FileSystem.get(config); }
From source file:eu.stratosphere.runtime.fs.hdfs.DistributedFileSystem.java
License:Apache License
/** * Returns a new Hadoop Configuration object using the path to the hadoop conf configured * in the Stratosphere configuration./*w w w . j av a 2 s.c om*/ * This method is public because its being used in the HadoopDataSource. */ public static org.apache.hadoop.conf.Configuration getHadoopConfiguration() { Configuration retConf = new org.apache.hadoop.conf.Configuration(); // We need to load both core-site.xml and hdfs-site.xml to determine the default fs path and // the hdfs configuration // Try to load HDFS configuration from Hadoop's own configuration files // 1. approach: Stratosphere configuration final String hdfsDefaultPath = GlobalConfiguration.getString(ConfigConstants.HDFS_DEFAULT_CONFIG, null); if (hdfsDefaultPath != null) { retConf.addResource(new org.apache.hadoop.fs.Path(hdfsDefaultPath)); } else { LOG.debug("Cannot find hdfs-default configuration file"); } final String hdfsSitePath = GlobalConfiguration.getString(ConfigConstants.HDFS_SITE_CONFIG, null); if (hdfsSitePath != null) { retConf.addResource(new org.apache.hadoop.fs.Path(hdfsSitePath)); } else { LOG.debug("Cannot find hdfs-site configuration file"); } // 2. Approach environment variables String[] possibleHadoopConfPaths = new String[4]; possibleHadoopConfPaths[0] = GlobalConfiguration.getString(ConfigConstants.PATH_HADOOP_CONFIG, null); possibleHadoopConfPaths[1] = System.getenv("HADOOP_CONF_DIR"); if (System.getenv("HADOOP_HOME") != null) { possibleHadoopConfPaths[2] = System.getenv("HADOOP_HOME") + "/conf"; possibleHadoopConfPaths[3] = System.getenv("HADOOP_HOME") + "/etc/hadoop"; // hadoop 2.2 } for (int i = 0; i < possibleHadoopConfPaths.length; i++) { if (possibleHadoopConfPaths[i] == null) { continue; } if (new File(possibleHadoopConfPaths[i]).exists()) { if (new File(possibleHadoopConfPaths[i] + "/core-site.xml").exists()) { retConf.addResource( new org.apache.hadoop.fs.Path(possibleHadoopConfPaths[i] + "/core-site.xml")); if (LOG.isDebugEnabled()) { LOG.debug( "Adding " + possibleHadoopConfPaths[i] + "/core-site.xml to hadoop configuration"); } } if (new File(possibleHadoopConfPaths[i] + "/hdfs-site.xml").exists()) { retConf.addResource( new org.apache.hadoop.fs.Path(possibleHadoopConfPaths[i] + "/hdfs-site.xml")); if (LOG.isDebugEnabled()) { LOG.debug( "Adding " + possibleHadoopConfPaths[i] + "/hdfs-site.xml to hadoop configuration"); } } } } return retConf; }
From source file:fire.examples.workflow.hbase.WorkflowHBase.java
License:Apache License
private void writeToHbase(String filePath) { Connection conn = null;/*from w w w .jav a 2 s . co m*/ try { Configuration configuration = HBaseConfiguration.create(); configuration.addResource(new Path("/etc/hbase/conf.cloudera.hbase/hbase-site.xml")); conn = ConnectionFactory.createConnection(configuration); Table t1 = conn.getTable(TableName.valueOf("person")); List<Put> putList = null; putList = readCSV(filePath); // p = new Put(Bytes.toBytes("rowKey")); // p.addColumn(Bytes.toBytes("columnFamilyName"), Bytes.toBytes("columnName"),Bytes.toBytes("Some Value")); t1.put(putList); t1.close(); Admin admin = conn.getAdmin(); conn.close(); } catch (IOException e) { System.out.println("Error HBase " + e.getMessage()); } }
From source file:github.ananthc.sampleapps.apex.kuduoutput.KafkaToKuduOutputApplicationTest.java
@Test public void testApplication() throws IOException, Exception { try {/*from www . j a v a2 s . c o m*/ LocalMode lma = LocalMode.newInstance(); Configuration conf = new Configuration(false); conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml")); lma.prepareDAG(new github.ananthc.sampleapps.apex.kuduoutput.KafkaToKuduOutputApplication(), conf); LocalMode.Controller lc = lma.getController(); lc.run(1000000); // runs for 10 seconds and quits } catch (ConstraintViolationException e) { Assert.fail("constraint violations: " + e.getConstraintViolations()); } }
From source file:gobblin.compaction.HdfsIO.java
License:Open Source License
private static void addHadoopConfigPropertiesToConf(Configuration conf) { Set<String> propertyNames = CompactionRunner.properties.stringPropertyNames(); for (String propertyName : propertyNames) { if (propertyName.startsWith(HADOOP_CONFIGFILE_)) { String hadoopConfigFile = CompactionRunner.properties.getProperty(propertyName); conf.addResource(hadoopConfigFile); LOG.info("Added Hadoop Config File: " + hadoopConfigFile); }/*from w ww . j av a2 s. c o m*/ } }