List of usage examples for org.apache.hadoop.conf Configuration setStrings
public void setStrings(String name, String... values)
name
property as as comma delimited values. From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Sort.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("sort :: WORKDIR not given."); return 3; }//from w w w .j a v a2 s . c o m if (args.size() == 1) { System.err.println("sort :: INPATH not given."); return 3; } if (!cacheAndSetProperties(parser)) return 3; final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue( stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "sort"); if (stringency == null) return 3; Path wrkDir = new Path(args.get(0)); final List<String> strInputs = args.subList(1, args.size()); final List<Path> inputs = new ArrayList<Path>(strInputs.size()); for (final String in : strInputs) inputs.add(new Path(in)); final Configuration conf = getConf(); Utils.setHeaderMergerSortOrder(conf, SortOrder.coordinate); conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0])); if (stringency != null) conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString()); // Used by Utils.getMergeableWorkFile() to name the output files. final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName(); conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName); final Timer t = new Timer(); try { // Required for path ".", for example. wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir); Utils.configureSampling(wrkDir, intermediateOutName, conf); final Job job = new Job(conf); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(SortReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(SortInputFormat.class); job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class); for (final Path in : inputs) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, wrkDir); job.setPartitionerClass(TotalOrderPartitioner.class); System.out.println("sort :: Sampling..."); t.start(); InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job, new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000, Math.max(100, reduceTasks))); System.out.printf("sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms()); job.submit(); System.out.println("sort :: Waiting for job completion..."); t.start(); if (!job.waitForCompletion(verbose)) { System.err.println("sort :: Job failed."); return 4; } System.out.printf("sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms()); } catch (IOException e) { System.err.printf("sort :: Hadoop error: %s\n", e); return 4; } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } if (outPath != null) try { Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "sort"); } catch (IOException e) { System.err.printf("sort :: Output merging failed: %s\n", e); return 5; } return 0; }
From source file:FormatStorage.Head.java
License:Open Source License
public void toJobConf(Configuration conf) { conf.setInt(ConstVar.HD_var, var); conf.setInt(ConstVar.HD_compress, compress); conf.setInt(ConstVar.HD_compressStyle, compressStyle); conf.setInt(ConstVar.HD_primaryIndex, primaryIndex); conf.setInt(ConstVar.HD_encode, encode); conf.setInt(ConstVar.HD_encodeStyle, encodeStyle); if (key != null) { conf.set(ConstVar.HD_key, key);//from w w w. ja v a2 s . co m } if (fieldMap != null) { short fieldNum = fieldMap.fieldNum(); String[] fieldStrings = new String[fieldNum]; Set<Short> keySet = fieldMap.fields.keySet(); Iterator<Short> iterator = keySet.iterator(); int i = 0; while (iterator.hasNext()) { Field field = fieldMap.fields.get(iterator.next()); fieldStrings[i++] = field.type + ConstVar.RecordSplit + field.len + ConstVar.RecordSplit + field.index; } conf.setStrings(ConstVar.HD_fieldMap, fieldStrings); } }
From source file:FormatStorage1.IHead.java
License:Open Source License
public void toJobConf(Configuration conf) { conf.setInt(ConstVar.HD_magic, magic); conf.setInt(ConstVar.HD_var, var); conf.setInt(ConstVar.HD_ver, ver);/*from www . j a v a2 s . c om*/ conf.setInt(ConstVar.HD_lineindex, lineindex); conf.setInt(ConstVar.HD_primaryIndex, primaryIndex); conf.setInt(ConstVar.HD_compress, compress); conf.setInt(ConstVar.HD_compressStyle, compressStyle); conf.setInt(ConstVar.HD_encode, encode); conf.setInt(ConstVar.HD_encodeStyle, encodeStyle); if (fieldMap != null) { int fieldNum = fieldMap.fieldtypes().size(); String[] fieldStrings = new String[fieldNum]; int i = 0; for (IRecord.IFType ft : this.fieldMap.fieldtypes().values()) { fieldStrings[i++] = ft.type() + ConstVar.RecordSplit + ft.len() + ConstVar.RecordSplit + ft.idx(); } conf.setStrings(ConstVar.HD_fieldMap, fieldStrings); } if (udi != null && udi.infos.size() > 0) { String[] udistrs = new String[udi.infos.size()]; int i = 0; for (Map.Entry<Integer, String> en : udi.infos.entrySet()) { udistrs[i++] = en.getKey() + ConstVar.RecordSplit + en.getValue(); } conf.setStrings(ConstVar.HD_udi, udistrs); } }
From source file:full_MapReduce.C4_5.java
License:Open Source License
private static void calcAttributesInfo(Map<String, String> conditions) throws Exception { Configuration conf = new Configuration(); for (Entry<String, String> condition : conditions.entrySet()) { conf.setStrings(condition.getKey(), condition.getValue()); }// ww w .j ava2 s. c o m Job job = Job.getInstance(conf); job.setJarByClass(C4_5.class); job.setJobName("C4.5_calcAttributesInfo"); FileInputFormat.addInputPath(job, summarized_data_path); FileOutputFormat.setOutputPath(job, calc_attributes_info_path); job.setMapperClass(AttributeInfoMapper.class); job.setReducerClass(AttributeInfoReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AttributeCounterWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(MapWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.waitForCompletion(false); }
From source file:gobblin.compaction.HdfsIO.java
License:Open Source License
private static void addResourceToConf(Configuration conf) { conf.setStrings(getHdfsUriHadoopPropertyName(), HDFS_URI_DEFAULT); addHadoopConfigPropertiesToConf(conf); if (CompactionRunner.properties.containsKey(HDFS_URI)) { conf.setStrings(getHdfsUriHadoopPropertyName(), CompactionRunner.properties.getProperty(HDFS_URI)); }//from ww w. j av a 2 s. c o m }
From source file:gr.ntua.h2rdf.partialJoin.JoinPlaner.java
License:Open Source License
private static void printNonJoinV(Configuration joinConf, String ret, String[] lines) { //try {/*from w w w .j ava2 s . c o m*/ int s = 0; for (int i = 0; i < join_files.length; i++) { if (lines[i].contains("|")) { if (lines[i].contains("J")) System.exit(1); String fname = lines[i].substring(0, lines[i].indexOf("|")); joinConf.set("input.reduceScans." + s + ".fname", fname); //Bytes.writeByteArray(v, Bytes.toBytes(fname)); int id = Integer.parseInt(lines[i].substring(lines[i].indexOf(":") + 1));//String.valueOf(lines[i].charAt(lines[i].length()-1))); Scan scan = getScan(id); joinConf.set("input.reduceScans." + s + ".startrow", Bytes.toStringBinary(scan.getStartRow())); //Bytes.writeByteArray(v, scan.getStartRow()); if (scan.hasFamilies()) { System.out.println(Bytes.toString(scan.getFamilies()[0])); joinConf.set("input.reduceScans." + s + ".columns", Bytes.toString(scan.getFamilies()[0])); //Bytes.writeByteArray(v, scan.getFamilies()[0]);//Bytes.toBytes(getScan(id).getInputColumns())); } else { System.out.println("no"); joinConf.set("input.reduceScans." + s + ".columns", ""); //Bytes.writeByteArray(v, Bytes.toBytes(""));//Bytes.toBytes(getScan(id).getInputColumns())); } s++; } } joinConf.setStrings("input.reduceScans", s + ""); //Bytes.writeByteArray(joinConf, Bytes.toBytes("end")); //} catch (IOException e) { // e.printStackTrace(); //} }
From source file:hk.newsRecommender.TFIDFClassify.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String hdfsUrl = conf.get("fs.defaultFS"); // part1---------------------------------------------------- Job job1 = Job.getInstance(conf, "computeTF"); Path outputPath1 = new Path(hdfsUrl + "/data/recommend/class1/tfidf1"); HadoopUtil.delete(conf, outputPath1); job1.setJarByClass(TFIDFClassify.class); job1.setMapperClass(Mapper_Part1.class); job1.setReducerClass(Reduce_Part1.class); job1.setMapOutputKeyClass(Text.class); job1.setMapOutputValueClass(Text.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); job1.setPartitionerClass(MyPartitoner.class); // MyPartitoner FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/data3.txt")); FileOutputFormat.setOutputPath(job1, outputPath1); job1.waitForCompletion(true);//from ww w . j a v a 2s .co m // part2---------------------------------------- Job job2 = Job.getInstance(conf, "computIDF"); Path outputPath2 = new Path(hdfsUrl + "/data/recommend/class1/tfidf2"); HadoopUtil.delete(conf, outputPath2); job2.setJarByClass(TFIDFClassify.class); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(Text.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); job2.setMapperClass(Mapper_Part2.class); job2.setReducerClass(Reduce_Part2.class); FileInputFormat.setInputPaths(job2, new Path(hdfsUrl + "/data/recommend/class1/tfidf1")); FileOutputFormat.setOutputPath(job2, outputPath2); job2.waitForCompletion(true); // part3---------------------------------------- Job job3 = Job.getInstance(conf, "sortByTFIDFDec"); Path outputPath3 = new Path(hdfsUrl + "/data/recommend/class1/tfidf3"); HadoopUtil.delete(conf, outputPath3); job3.setMapperClass(Mapper_Part3.class); job3.setReducerClass(Reduce_Part3.class); job3.setMapOutputKeyClass(CustomKey.class); job3.setMapOutputValueClass(NullWritable.class); job3.setOutputKeyClass(CustomKey.class); job3.setOutputValueClass(NullWritable.class); job3.setGroupingComparatorClass(CustomGroupComparator.class); job3.setPartitionerClass(CustomPartitioner.class); // MyPartitoner FileInputFormat.addInputPath(job3, new Path(hdfsUrl + "/data/recommend/class1/tfidf2")); FileOutputFormat.setOutputPath(job3, outputPath3); job3.waitForCompletion(true); // part4---------------??------------------------- // Job job4 = Job.getInstance(conf, "siftKeywords"); // Path outputPath4=new Path(hdfsUrl + "/data/recommend/class1/matrix1"); // HadoopUtil.delete(conf, outputPath4); // job4.setJarByClass(TFIDF.class); // job4.setMapperClass(Mapper_Part4.class); // job4.setReducerClass(Reduce_Part4.class); // job4.setMapOutputKeyClass(Text.class); // job4.setMapOutputValueClass(Text.class); // job4.setOutputKeyClass(Text.class); // job4.setOutputValueClass(Text.class); // job4.setPartitionerClass(CustomPartitioner.class); // FileInputFormat.addInputPath(job4, new Path(hdfsUrl + "/data/recommend/class1/tfidf3")); // FileOutputFormat.setOutputPath(job4, outputPath4); // job4.waitForCompletion(true); // part5---------------------------------------- FileSystem fsopen = FileSystem.get(conf); FSDataInputStream in = fsopen.open(new Path(hdfsUrl + "/data/recommend/matrix1/part-r-00000")); Scanner scan = new Scanner(in); List<String> keywordList = new ArrayList<String>(); while (scan.hasNext()) { keywordList.add(scan.next()); } // must before job conf.setStrings("keyword", keywordList.toArray(new String[keywordList.size()])); Job job5 = Job.getInstance(conf, "generateMatrix"); Path outputPath5 = new Path(hdfsUrl + "/data/recommend/class1/matrix2"); HadoopUtil.delete(conf, outputPath5); job5.setJarByClass(TFIDF.class); job5.setMapperClass(Mapper_Part5.class); job5.setReducerClass(Reduce_Part5.class); job5.setMapOutputKeyClass(Text.class); job5.setMapOutputValueClass(Text.class); job5.setOutputKeyClass(Text.class); job5.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job5, new Path(hdfsUrl + "/data/recommend/class1/tfidf3")); FileOutputFormat.setOutputPath(job5, outputPath5); job5.waitForCompletion(true); }
From source file:hk.newsRecommender.UserTag.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String hdfsUrl = conf.get("fs.defaultFS"); FileSystem fs = FileSystem.get(conf); Job job1 = Job.getInstance(conf, "generateUserNewsMapping"); Path output1Path = new Path(hdfsUrl + "/data/recommend/user1"); HadoopUtil.delete(conf, output1Path); job1.setJarByClass(TFIDF.class); job1.setMapperClass(Mapper_Part1.class); job1.setMapOutputKeyClass(Text.class); job1.setMapOutputValueClass(Text.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/data2.txt")); FileOutputFormat.setOutputPath(job1, output1Path); job1.waitForCompletion(true);/* ww w . jav a 2 s . c o m*/ Job job2 = Job.getInstance(conf, "generateUserNewsCatMapping"); Path output2Path = new Path(hdfsUrl + "/data/recommend/user2"); HadoopUtil.delete(conf, output2Path); job2.setJarByClass(UserTag.class); job2.setMapperClass(Mapper_Part2.class); job2.setReducerClass(Reduce_Part2.class); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(Text.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job2, new Path(hdfsUrl + "/data/recommend/user1")); FileInputFormat.addInputPath(job2, new Path(hdfsUrl + "/data/recommend/ClusterPointsInfo.txt")); FileOutputFormat.setOutputPath(job2, output2Path); job2.waitForCompletion(true); Job job3 = Job.getInstance(conf, "countUserNewsCatMapping"); Path output3Path = new Path(hdfsUrl + "/data/recommend/user3"); HadoopUtil.delete(conf, output3Path); job3.setMapperClass(Mapper_Part3.class); job3.setReducerClass(Reduce_Part3.class); job3.setMapOutputKeyClass(Text.class); job3.setMapOutputValueClass(Text.class); job3.setOutputKeyClass(Text.class); job3.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job3, new Path(hdfsUrl + "/data/recommend/user2")); FileOutputFormat.setOutputPath(job3, output3Path); job3.waitForCompletion(true); Job job4 = Job.getInstance(conf, "generateClusterUniqueRecord"); Path output4Path = new Path(hdfsUrl + "/data/recommend/user4"); HadoopUtil.delete(conf, output4Path); job4.setMapperClass(Mapper_Part4.class); job4.setReducerClass(Reduce_Part4.class); job4.setMapOutputKeyClass(Text.class); job4.setMapOutputValueClass(Text.class); job4.setOutputKeyClass(Text.class); job4.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job4, new Path(hdfsUrl + "/data/recommend/ClusterPointsInfo.txt")); FileOutputFormat.setOutputPath(job4, output4Path); job4.waitForCompletion(true); FileSystem fsopen = FileSystem.get(conf); FSDataInputStream in = fsopen.open(new Path(hdfsUrl + "/data/recommend/user4/part-r-00000")); Scanner scan = new Scanner(in); List<String> keywordList = new ArrayList<String>(); while (scan.hasNext()) { keywordList.add(scan.next()); } conf.setStrings("category", keywordList.toArray(new String[keywordList.size()])); Path outPath4 = new Path(hdfsUrl + "/data/recommend/user5"); if (fs.exists(outPath4)) { fs.delete(outPath4, true); System.out.println("???"); } Job job5 = Job.getInstance(conf, "generateUserPreferableMatrix"); job5.setMapperClass(Mapper_Part5.class); job5.setReducerClass(Reduce_Part5.class); job5.setMapOutputKeyClass(Text.class); job5.setMapOutputValueClass(Text.class); job5.setOutputKeyClass(Text.class); job5.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job5, new Path(hdfsUrl + "/data/recommend/user3")); FileOutputFormat.setOutputPath(job5, new Path(hdfsUrl + "/data/recommend/user5")); job5.waitForCompletion(true); }
From source file:hydrograph.engine.cascading.scheme.TextDelimitedAndFixedWidth.java
License:Apache License
@Override public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (hasZippedFiles(FileInputFormat.getInputPaths(asJobConfInstance(conf)))) throw new IllegalStateException("cannot read zip files: " + Arrays.toString(FileInputFormat.getInputPaths(asJobConfInstance(conf)))); conf.setBoolean("mapred.mapper.new-api", false); conf.setClass("mapred.input.format.class", DelimitedAndFixedWidthInputFormat.class, InputFormat.class); conf.set("charsetName", charsetName); conf.set("quote", quote); conf.set("lengthsAndDelimiters", DelimitedAndFixedWidthHelper.arrayToString(lengthsAndDelimiters)); conf.setStrings("lengthsAndDelimitersType", lengthsAndDelimitersType); }
From source file:importToNewTable.createDataJob.java
public void importData(String filePath, String[] selectedFamilies, String[] keys) throws Exception { //Use family class get columns names. family familyName = new family(); String[] column = familyName.columnfamily(filePath); //Get input file name to be used as table name. File f = new File(filePath); String fname = f.getName();/*www . j a va2s . c om*/ String vcfName = FilenameUtils.removeExtension(fname); createTable ht = new createTable(); ht.table(vcfName, column, selectedFamilies); //Mapreduce job. Configuration conf = new Configuration(); conf.setStrings("column", column); conf.setStrings("keys", keys); conf.setStrings("selectedFamilies", selectedFamilies); conf.set("tableName", vcfName); Job job = Job.getInstance(conf, "VCF"); job.setJarByClass(importToNewTable.createDataJob.class); job.setMapperClass(importToNewTable.createDataMapper.class); job.setReducerClass(org.apache.hadoop.hbase.mapreduce.PutSortReducer.class); // TODO: specify output types job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setOutputFormatClass(HFileOutputFormat.class); // TODO: specify input and output DIRECTORIES (not files) FileInputFormat.setInputPaths(job, new Path(filePath)); File temp = new File("/tmp/HVCF/output"); this.deleteDir(temp); FileOutputFormat.setOutputPath(job, new Path("/tmp/HVCF/output")); Configuration hbconf = HBaseConfiguration.create(); HTable table = new HTable(hbconf, vcfName); HFileOutputFormat.configureIncrementalLoad(job, table); if (!job.waitForCompletion(true)) { return; } //Bulkload Hfiles into HBase table. LoadIncrementalHFiles loader = new LoadIncrementalHFiles(hbconf); loader.doBulkLoad(new Path("/tmp/HVCF/output"), table); }