List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:MStress_Client.java
License:Open Source License
public static void main(String args[]) { parseOptions(args);//from w w w. j av a2s . c om int result = 0; try { Configuration conf = new Configuration(true); String confSet = "hdfs://" + dfsServer_ + ":" + dfsPort_; conf.set("fs.default.name", confSet); conf.set("fs.trash.interval", "0"); InetSocketAddress inet = new InetSocketAddress(dfsServer_, dfsPort_); dfsClient_ = new DFSClient(inet, conf); if (parsePlanFile() < 0) { System.exit(-1); } if (testName_.equals("create")) { result = createDFSPaths(); } else if (testName_.equals("stat")) { result = statDFSPaths(); } else if (testName_.equals("readdir")) { result = listDFSPaths(); } else if (testName_.equals("delete")) { result = removeDFSPaths(); } else { System.out.printf("Error: unrecognized test \'%s\'\n", testName_); System.exit(-1); } } catch (IOException e) { e.printStackTrace(); System.exit(-1); } if (result != 0) { System.exit(-1); } return; }
From source file:PopulateBseData.java
License:Apache License
public PopulateBseData() throws IOException { Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", "localhost"); conf.set("hbase.zookeeper.property.clientPort", "2181"); hTable = new HTable(conf, "stockData1"); }
From source file:ImportTsv.java
License:Apache License
/** * Sets up the actual job./* ww w . j a v a2s. c o m*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { Job job = null; try (Connection connection = ConnectionFactory.createConnection(conf)) { try (Admin admin = connection.getAdmin()) { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; TableName tableName = TableName.valueOf(args[0]); Path inputDir = new Path(args[1]); // set filter conf.set(EASTCOM_FILTER_PARAMS, args[3]); conf.set(EASTCOM_FILTER_DEFINE, args[4]); String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); String columns[] = conf.getStrings(COLUMNS_CONF_KEY); if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) { String fileLoc = conf.get(CREDENTIALS_LOCATION); Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf); job.getCredentials().addAll(cred); } if (hfileOutPath != null) { if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) { LOG.warn(errorMsg); // TODO: this is backwards. Instead of depending on the existence of a table, // create a sane splits file for HFileOutputFormat based on data sampling. createTable(admin, tableName, columns); } else { LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } } try (HTable table = (HTable) connection.getTable(tableName)) { boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false); // if no.strict is false then check column family if (!noStrict) { ArrayList<String> unmatchedFamilies = new ArrayList<String>(); Set<String> cfSet = getColumnFamilies(columns); HTableDescriptor tDesc = table.getTableDescriptor(); for (String cf : cfSet) { if (tDesc.getFamily(Bytes.toBytes(cf)) == null) { unmatchedFamilies.add(cf); } } if (unmatchedFamilies.size() > 0) { ArrayList<String> familyNames = new ArrayList<String>(); for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) { familyNames.add(family.getNameAsString()); } String msg = "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName + " column families " + familyNames + ".\n" + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY + "=true.\n"; usage(msg); System.exit(-1); } } job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); if (mapperClass.equals(TsvImporterTextMapper.class)) { job.setMapOutputValueClass(Text.class); job.setReducerClass(TextSortReducer.class); } else { job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); } HFileOutputFormat2.configureIncrementalLoad(job, table, table); } } else { if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } if (mapperClass.equals(TsvImporterTextMapper.class)) { usage(TsvImporterTextMapper.class.toString() + " should not be used for non bulkloading case. use " + TsvImporterMapper.class.toString() + " or custom mapper whose value type is Put."); System.exit(-1); } // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); } } return job; }
From source file:HBaseBloomFilterSemiJoinSystemTest.java
License:Apache License
public static void setUp() throws Exception { Configuration conf = HBaseConfiguration.create(); conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName()); util = new HBaseTestingUtility(conf); }
From source file:RunPageRankSchimmy.java
License:Apache License
private float phase1(String path, int i, int j, int n, boolean useCombiner, boolean useInmapCombiner, boolean useRange) throws Exception { Configuration conf = getConf(); String in = path + "/iter" + FORMAT.format(i); String out = path + "/iter" + FORMAT.format(j) + "t"; String outm = out + "-mass"; FileSystem fs = FileSystem.get(conf); // We need to actually count the number of part files to get the number // of partitions (because the directory might contain _log). int numPartitions = 0; for (FileStatus s : FileSystem.get(conf).listStatus(new Path(in))) { if (s.getPath().getName().contains("part-")) { numPartitions++;//from ww w . ja v a 2s. c o m } } conf.setInt("NodeCount", n); Partitioner<IntWritable, Writable> p = null; if (useRange) { p = new RangePartitioner(); ((Configurable) p).setConf(conf); } else { p = new HashPartitioner<IntWritable, Writable>(); } // This is really annoying: the mapping between the partition numbers on // disk (i.e., part-XXXX) and what partition the file contains (i.e., // key.hash % #reducer) is arbitrary... so this means that we need to // open up each partition, peek inside to find out. IntWritable key = new IntWritable(); PageRankNode value = new PageRankNode(); FileStatus[] status = fs.listStatus(new Path(in)); StringBuilder sb = new StringBuilder(); for (FileStatus f : status) { if (!f.getPath().getName().contains("part-")) { continue; } SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(f.getPath())); reader.next(key, value); int np = p.getPartition(key, value, numPartitions); reader.close(); LOG.info(f.getPath() + "\t" + np); sb.append(np + "=" + f.getPath() + ";"); } LOG.info(sb.toString().trim()); LOG.info("PageRankSchimmy: iteration " + j + ": Phase1"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - nodeCnt: " + n); LOG.info(" - useCombiner: " + useCombiner); LOG.info(" - useInmapCombiner: " + useInmapCombiner); LOG.info(" - numPartitions: " + numPartitions); LOG.info(" - useRange: " + useRange); LOG.info("computed number of partitions: " + numPartitions); int numReduceTasks = numPartitions; conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("PageRankMassPath", outm); conf.set("BasePath", in); conf.set("PartitionMapping", sb.toString().trim()); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); Job job = Job.getInstance(conf); job.setJobName("PageRankSchimmy:iteration" + j + ":Phase1"); job.setJarByClass(RunPageRankSchimmy.class); job.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); if (useInmapCombiner) { job.setMapperClass(MapWithInMapperCombiningClass.class); } else { job.setMapperClass(MapClass.class); } if (useCombiner) { job.setCombinerClass(CombineClass.class); } if (useRange) { job.setPartitionerClass(RangePartitioner.class); } job.setReducerClass(ReduceClass.class); FileSystem.get(conf).delete(new Path(out), true); FileSystem.get(conf).delete(new Path(outm), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); float mass = Float.NEGATIVE_INFINITY; for (FileStatus f : fs.listStatus(new Path(outm))) { FSDataInputStream fin = fs.open(f.getPath()); mass = sumLogProbs(mass, fin.readFloat()); fin.close(); } return mass; }
From source file:TaskSearchWords.java
public static void main(String[] args) throws Exception { String hadoopServer = "ip-172-31-13-245.ap-southeast-1.compute.internal"; Configuration conf = new Configuration(); // this should be like defined in your mapred-site.xml conf.set("mapred.job.tracker", hadoopServer + ":54311"); // like defined in hdfs-site.xml conf.set("fs.default.name", "hdfs://" + hadoopServer + ":9000"); //setting mapred classes for HDFS to know which classes to process conf.set("mapreduce.map.class", "TokenizerMapper"); conf.set("mapreduce.reduce.class", "IntSumReducer"); //to prevent classdefnotfound exception conf.set("mapred.jar", "C:\\GitRepos\\OCR\\HadoopTasks\\dist\\HadoopTasks.jar"); //to pass parameters to mapred classes conf.set("RAWOCRCLOB", "Omeprazole_Cap E/C 10mg\n" + "Dressit Ster esDress\n" + "Flaminal Forte 15g\n" + "Co-Magaldrox_Susp 195mg/220mg/5ml S/F\n" + "Antacid/Oxetacaine_Oral Susp S/F\n" + "Simeticone_Susp 40mg/ml S/F\n" + "Infacol_Susp 40mg/ml S/F"); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(TaskSearchWords.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path("/user/ubuntu/MedicinesProcessed.csv")); FileSystem fs = FileSystem.get(conf); Path out = new Path("/user/ubuntu/processed/"); fs.delete(out, true);/* w w w . jav a2s. c o m*/ //finally set the empty out path FileOutputFormat.setOutputPath(job, out); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:CountJob.java
License:Apache License
public static void doJob(String param, String args[], String msgs) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); conf.set(TokenizerMapper.PATTERN, args[2]); FileSystem hdfs = FileSystem.get(conf); Path tempOutput1 = new Path("/data/output/temp/" + param + "1"); Path tempOutput2 = new Path("/data/output/temp/" + param + "2"); if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) { hdfs.delete(tempOutput1, true);/*from w ww . j av a2s . c om*/ hdfs.delete(tempOutput2, true); } Job job = new Job(conf, "word count"); job.setJarByClass(CountJob.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, tempOutput1); job.waitForCompletion(true); Job sortJob1 = new Job(conf); sortJob1.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob1, tempOutput1); sortJob1.setInputFormatClass(SequenceFileInputFormat.class); sortJob1.setMapperClass(InverseMapper.class); sortJob1.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob1, tempOutput2); sortJob1.setSortComparatorClass( // sort by decreasing freq LongWritable.DecreasingComparator.class); sortJob1.waitForCompletion(true); hdfs.delete(tempOutput1, true); }
From source file:BuildPersonalizedPageRankRecords.java
License:Apache License
/** * Runs this tool./*ww w. j av a 2 s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); //parsing more than 1 integer later; options.addOption( OptionBuilder.withArgName("src").hasArg().withDescription("source of pagerank").create(SOURCES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(SOURCES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); //Change to array later String src = cmdline.getOptionValue(SOURCES); LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); //more to be set later; conf.set(NODE_SRC, src); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPersonalizedPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNodeMultiSrc.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNodeMultiSrc.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:RunPersonalizedPageRankBasic.java
License:Apache License
/** * Runs this tool./*ww w.j a va 2s.co m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(COMBINER, "use combiner")); options.addOption(new Option(INMAPPER_COMBINER, "user in-mapper combiner")); options.addOption(new Option(RANGE, "use range partitioner")); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("base path").create(BASE)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("start iteration").create(START)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("end iteration").create(END)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption(OptionBuilder.withArgName("sources").hasArg().withDescription("personalized source nodes") .create(SRC_NODES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(BASE) || !cmdline.hasOption(START) || !cmdline.hasOption(END) || !cmdline.hasOption(NUM_NODES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String basePath = cmdline.getOptionValue(BASE); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); int s = Integer.parseInt(cmdline.getOptionValue(START)); int e = Integer.parseInt(cmdline.getOptionValue(END)); boolean useCombiner = cmdline.hasOption(COMBINER); boolean useInmapCombiner = cmdline.hasOption(INMAPPER_COMBINER); boolean useRange = cmdline.hasOption(RANGE); String src = cmdline.getOptionValue(SRC_NODES); LOG.info("Tool name: RunPageRank"); LOG.info(" - base path: " + basePath); LOG.info(" - num nodes: " + n); LOG.info(" - start iteration: " + s); LOG.info(" - end iteration: " + e); LOG.info(" - use combiner: " + useCombiner); LOG.info(" - use in-mapper combiner: " + useInmapCombiner); LOG.info(" - user range partitioner: " + useRange); Configuration conf = getConf(); conf.set(NODE_SRC, src); // Iterate PageRank. for (int i = s; i < e; i++) { iteratePageRank(i, i + 1, basePath, n, useCombiner, useInmapCombiner); } return 0; }
From source file:RecordExtracting.java
License:Apache License
/** * Runs this tool./* w w w. j a v a 2s.c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); /* options.addOption(OptionBuilder.withArgName("path").hasArg() .withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg() .withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("number of nodes").create(NUM_NODES)); //parsing more than 1 integer later;*/ options.addOption( OptionBuilder.withArgName("src").hasArg().withDescription("spamming users").create(SOURCES)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /* if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)||!cmdline.hasOption(SOURCES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ String inputPath = "xzzqskfinal/reviewsNew.txt";//cmdline.getOptionValue(INPUT); String outputPath = "xzzqskfinal/SpammingRecord";//cmdline.getOptionValue(OUTPUT); //int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); //Change to array later String src = cmdline.getOptionValue(SOURCES); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + RecordExtracting.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); //LOG.info(" - numNodes: " + n); Configuration conf = getConf(); conf.set("mapreduce.map.memory.mb", "2048"); conf.set("mapreduce.map.java.opts", "-Xmx2048m"); conf.set("mapreduce.reduce.memory.mb", "2048"); conf.set("mapreduce.reduce.java.opts", "-Xmx2048m"); //conf.setInt(NODE_CNT_FIELD, n); //more to be set later; conf.set(NODE_SRC, src); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(RatingSpamming.class.getSimpleName()); job.setJarByClass(RecordExtracting.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); //job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(Text.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }