Example usage for org.apache.hadoop.conf Configuration setStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setStrings.

Prototype

public void setStrings(String name, String... values)

Source Link

Document

Set the array of string values for the name property as as comma delimited values.

Usage

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Sort.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {
    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("sort :: WORKDIR not given.");
        return 3;
    }//from  w w  w .j a v a2 s . c o m
    if (args.size() == 1) {
        System.err.println("sort :: INPATH not given.");
        return 3;
    }
    if (!cacheAndSetProperties(parser))
        return 3;

    final SAMFileReader.ValidationStringency stringency = Utils.toStringency(parser.getOptionValue(
            stringencyOpt, SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY.toString()), "sort");
    if (stringency == null)
        return 3;

    Path wrkDir = new Path(args.get(0));

    final List<String> strInputs = args.subList(1, args.size());
    final List<Path> inputs = new ArrayList<Path>(strInputs.size());
    for (final String in : strInputs)
        inputs.add(new Path(in));

    final Configuration conf = getConf();

    Utils.setHeaderMergerSortOrder(conf, SortOrder.coordinate);
    conf.setStrings(Utils.HEADERMERGER_INPUTS_PROPERTY, strInputs.toArray(new String[0]));

    if (stringency != null)
        conf.set(SAMHeaderReader.VALIDATION_STRINGENCY_PROPERTY, stringency.toString());

    // Used by Utils.getMergeableWorkFile() to name the output files.
    final String intermediateOutName = (outPath == null ? inputs.get(0) : outPath).getName();
    conf.set(Utils.WORK_FILENAME_PROPERTY, intermediateOutName);

    final Timer t = new Timer();
    try {
        // Required for path ".", for example.
        wrkDir = wrkDir.getFileSystem(conf).makeQualified(wrkDir);

        Utils.configureSampling(wrkDir, intermediateOutName, conf);

        final Job job = new Job(conf);

        job.setJarByClass(Sort.class);
        job.setMapperClass(Mapper.class);
        job.setReducerClass(SortReducer.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(SAMRecordWritable.class);

        job.setInputFormatClass(SortInputFormat.class);
        job.setOutputFormatClass(CLIMergingAnySAMOutputFormat.class);

        for (final Path in : inputs)
            FileInputFormat.addInputPath(job, in);

        FileOutputFormat.setOutputPath(job, wrkDir);

        job.setPartitionerClass(TotalOrderPartitioner.class);

        System.out.println("sort :: Sampling...");
        t.start();

        InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job,
                new InputSampler.RandomSampler<LongWritable, SAMRecordWritable>(0.01, 10000,
                        Math.max(100, reduceTasks)));

        System.out.printf("sort :: Sampling complete in %d.%03d s.\n", t.stopS(), t.fms());

        job.submit();

        System.out.println("sort :: Waiting for job completion...");
        t.start();

        if (!job.waitForCompletion(verbose)) {
            System.err.println("sort :: Job failed.");
            return 4;
        }

        System.out.printf("sort :: Job complete in %d.%03d s.\n", t.stopS(), t.fms());

    } catch (IOException e) {
        System.err.printf("sort :: Hadoop error: %s\n", e);
        return 4;
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

    if (outPath != null)
        try {
            Utils.mergeSAMInto(outPath, wrkDir, "", "", samFormat, conf, "sort");
        } catch (IOException e) {
            System.err.printf("sort :: Output merging failed: %s\n", e);
            return 5;
        }
    return 0;
}

From source file:FormatStorage.Head.java

License:Open Source License

public void toJobConf(Configuration conf) {
    conf.setInt(ConstVar.HD_var, var);
    conf.setInt(ConstVar.HD_compress, compress);
    conf.setInt(ConstVar.HD_compressStyle, compressStyle);
    conf.setInt(ConstVar.HD_primaryIndex, primaryIndex);
    conf.setInt(ConstVar.HD_encode, encode);
    conf.setInt(ConstVar.HD_encodeStyle, encodeStyle);
    if (key != null) {
        conf.set(ConstVar.HD_key, key);//from  w w  w.  ja  v  a2 s  .  co m
    }

    if (fieldMap != null) {
        short fieldNum = fieldMap.fieldNum();
        String[] fieldStrings = new String[fieldNum];

        Set<Short> keySet = fieldMap.fields.keySet();
        Iterator<Short> iterator = keySet.iterator();

        int i = 0;
        while (iterator.hasNext()) {
            Field field = fieldMap.fields.get(iterator.next());

            fieldStrings[i++] = field.type + ConstVar.RecordSplit + field.len + ConstVar.RecordSplit
                    + field.index;
        }
        conf.setStrings(ConstVar.HD_fieldMap, fieldStrings);
    }

}

From source file:FormatStorage1.IHead.java

License:Open Source License

public void toJobConf(Configuration conf) {
    conf.setInt(ConstVar.HD_magic, magic);
    conf.setInt(ConstVar.HD_var, var);
    conf.setInt(ConstVar.HD_ver, ver);/*from  www .  j  a  v a2  s  .  c  om*/
    conf.setInt(ConstVar.HD_lineindex, lineindex);
    conf.setInt(ConstVar.HD_primaryIndex, primaryIndex);
    conf.setInt(ConstVar.HD_compress, compress);
    conf.setInt(ConstVar.HD_compressStyle, compressStyle);
    conf.setInt(ConstVar.HD_encode, encode);
    conf.setInt(ConstVar.HD_encodeStyle, encodeStyle);

    if (fieldMap != null) {
        int fieldNum = fieldMap.fieldtypes().size();
        String[] fieldStrings = new String[fieldNum];

        int i = 0;
        for (IRecord.IFType ft : this.fieldMap.fieldtypes().values()) {
            fieldStrings[i++] = ft.type() + ConstVar.RecordSplit + ft.len() + ConstVar.RecordSplit + ft.idx();
        }
        conf.setStrings(ConstVar.HD_fieldMap, fieldStrings);
    }

    if (udi != null && udi.infos.size() > 0) {
        String[] udistrs = new String[udi.infos.size()];
        int i = 0;
        for (Map.Entry<Integer, String> en : udi.infos.entrySet()) {
            udistrs[i++] = en.getKey() + ConstVar.RecordSplit + en.getValue();
        }
        conf.setStrings(ConstVar.HD_udi, udistrs);
    }
}

From source file:full_MapReduce.C4_5.java

License:Open Source License

private static void calcAttributesInfo(Map<String, String> conditions) throws Exception {
    Configuration conf = new Configuration();
    for (Entry<String, String> condition : conditions.entrySet()) {
        conf.setStrings(condition.getKey(), condition.getValue());
    }// ww  w  .j  ava2 s.  c o m

    Job job = Job.getInstance(conf);
    job.setJarByClass(C4_5.class);
    job.setJobName("C4.5_calcAttributesInfo");

    FileInputFormat.addInputPath(job, summarized_data_path);
    FileOutputFormat.setOutputPath(job, calc_attributes_info_path);

    job.setMapperClass(AttributeInfoMapper.class);
    job.setReducerClass(AttributeInfoReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(AttributeCounterWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(MapWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.waitForCompletion(false);
}

From source file:gobblin.compaction.HdfsIO.java

License:Open Source License

private static void addResourceToConf(Configuration conf) {
    conf.setStrings(getHdfsUriHadoopPropertyName(), HDFS_URI_DEFAULT);
    addHadoopConfigPropertiesToConf(conf);
    if (CompactionRunner.properties.containsKey(HDFS_URI)) {
        conf.setStrings(getHdfsUriHadoopPropertyName(), CompactionRunner.properties.getProperty(HDFS_URI));
    }//from ww w.  j  av a 2 s. c o m
}

From source file:gr.ntua.h2rdf.partialJoin.JoinPlaner.java

License:Open Source License

private static void printNonJoinV(Configuration joinConf, String ret, String[] lines) {
    //try {/*from  w  w w  .j  ava2 s . c  o  m*/
    int s = 0;
    for (int i = 0; i < join_files.length; i++) {
        if (lines[i].contains("|")) {
            if (lines[i].contains("J"))
                System.exit(1);
            String fname = lines[i].substring(0, lines[i].indexOf("|"));
            joinConf.set("input.reduceScans." + s + ".fname", fname);
            //Bytes.writeByteArray(v, Bytes.toBytes(fname));
            int id = Integer.parseInt(lines[i].substring(lines[i].indexOf(":") + 1));//String.valueOf(lines[i].charAt(lines[i].length()-1)));
            Scan scan = getScan(id);
            joinConf.set("input.reduceScans." + s + ".startrow", Bytes.toStringBinary(scan.getStartRow()));
            //Bytes.writeByteArray(v, scan.getStartRow());
            if (scan.hasFamilies()) {
                System.out.println(Bytes.toString(scan.getFamilies()[0]));
                joinConf.set("input.reduceScans." + s + ".columns", Bytes.toString(scan.getFamilies()[0]));
                //Bytes.writeByteArray(v, scan.getFamilies()[0]);//Bytes.toBytes(getScan(id).getInputColumns()));
            } else {
                System.out.println("no");
                joinConf.set("input.reduceScans." + s + ".columns", "");
                //Bytes.writeByteArray(v, Bytes.toBytes(""));//Bytes.toBytes(getScan(id).getInputColumns()));
            }
            s++;
        }
    }
    joinConf.setStrings("input.reduceScans", s + "");
    //Bytes.writeByteArray(joinConf, Bytes.toBytes("end"));
    //} catch (IOException e) {
    //   e.printStackTrace();
    //}
}

From source file:hk.newsRecommender.TFIDFClassify.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String hdfsUrl = conf.get("fs.defaultFS");

    //      part1----------------------------------------------------
    Job job1 = Job.getInstance(conf, "computeTF");
    Path outputPath1 = new Path(hdfsUrl + "/data/recommend/class1/tfidf1");
    HadoopUtil.delete(conf, outputPath1);
    job1.setJarByClass(TFIDFClassify.class);
    job1.setMapperClass(Mapper_Part1.class);
    job1.setReducerClass(Reduce_Part1.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    job1.setPartitionerClass(MyPartitoner.class); // MyPartitoner
    FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/data3.txt"));
    FileOutputFormat.setOutputPath(job1, outputPath1);
    job1.waitForCompletion(true);//from ww w  . j  a v a 2s  .co m

    // part2----------------------------------------
    Job job2 = Job.getInstance(conf, "computIDF");
    Path outputPath2 = new Path(hdfsUrl + "/data/recommend/class1/tfidf2");
    HadoopUtil.delete(conf, outputPath2);
    job2.setJarByClass(TFIDFClassify.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
    job2.setMapperClass(Mapper_Part2.class);
    job2.setReducerClass(Reduce_Part2.class);
    FileInputFormat.setInputPaths(job2, new Path(hdfsUrl + "/data/recommend/class1/tfidf1"));
    FileOutputFormat.setOutputPath(job2, outputPath2);
    job2.waitForCompletion(true);

    //      part3----------------------------------------
    Job job3 = Job.getInstance(conf, "sortByTFIDFDec");
    Path outputPath3 = new Path(hdfsUrl + "/data/recommend/class1/tfidf3");
    HadoopUtil.delete(conf, outputPath3);
    job3.setMapperClass(Mapper_Part3.class);
    job3.setReducerClass(Reduce_Part3.class);
    job3.setMapOutputKeyClass(CustomKey.class);
    job3.setMapOutputValueClass(NullWritable.class);
    job3.setOutputKeyClass(CustomKey.class);
    job3.setOutputValueClass(NullWritable.class);
    job3.setGroupingComparatorClass(CustomGroupComparator.class);
    job3.setPartitionerClass(CustomPartitioner.class); // MyPartitoner
    FileInputFormat.addInputPath(job3, new Path(hdfsUrl + "/data/recommend/class1/tfidf2"));
    FileOutputFormat.setOutputPath(job3, outputPath3);
    job3.waitForCompletion(true);

    //      part4---------------??-------------------------
    //      Job job4 = Job.getInstance(conf, "siftKeywords");
    //      Path outputPath4=new Path(hdfsUrl + "/data/recommend/class1/matrix1");
    //      HadoopUtil.delete(conf, outputPath4);
    //      job4.setJarByClass(TFIDF.class);
    //      job4.setMapperClass(Mapper_Part4.class);
    //      job4.setReducerClass(Reduce_Part4.class);
    //      job4.setMapOutputKeyClass(Text.class);
    //      job4.setMapOutputValueClass(Text.class);
    //      job4.setOutputKeyClass(Text.class);
    //      job4.setOutputValueClass(Text.class);
    //      job4.setPartitionerClass(CustomPartitioner.class);
    //      FileInputFormat.addInputPath(job4, new Path(hdfsUrl + "/data/recommend/class1/tfidf3"));
    //      FileOutputFormat.setOutputPath(job4, outputPath4);
    //      job4.waitForCompletion(true);

    //      part5----------------------------------------
    FileSystem fsopen = FileSystem.get(conf);
    FSDataInputStream in = fsopen.open(new Path(hdfsUrl + "/data/recommend/matrix1/part-r-00000"));
    Scanner scan = new Scanner(in);
    List<String> keywordList = new ArrayList<String>();
    while (scan.hasNext()) {
        keywordList.add(scan.next());
    }
    //      must before job
    conf.setStrings("keyword", keywordList.toArray(new String[keywordList.size()]));
    Job job5 = Job.getInstance(conf, "generateMatrix");
    Path outputPath5 = new Path(hdfsUrl + "/data/recommend/class1/matrix2");
    HadoopUtil.delete(conf, outputPath5);
    job5.setJarByClass(TFIDF.class);
    job5.setMapperClass(Mapper_Part5.class);
    job5.setReducerClass(Reduce_Part5.class);
    job5.setMapOutputKeyClass(Text.class);
    job5.setMapOutputValueClass(Text.class);
    job5.setOutputKeyClass(Text.class);
    job5.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job5, new Path(hdfsUrl + "/data/recommend/class1/tfidf3"));
    FileOutputFormat.setOutputPath(job5, outputPath5);
    job5.waitForCompletion(true);

}

From source file:hk.newsRecommender.UserTag.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String hdfsUrl = conf.get("fs.defaultFS");
    FileSystem fs = FileSystem.get(conf);

    Job job1 = Job.getInstance(conf, "generateUserNewsMapping");
    Path output1Path = new Path(hdfsUrl + "/data/recommend/user1");
    HadoopUtil.delete(conf, output1Path);
    job1.setJarByClass(TFIDF.class);
    job1.setMapperClass(Mapper_Part1.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/data2.txt"));
    FileOutputFormat.setOutputPath(job1, output1Path);
    job1.waitForCompletion(true);/* ww w  .  jav a  2  s .  c  o m*/

    Job job2 = Job.getInstance(conf, "generateUserNewsCatMapping");
    Path output2Path = new Path(hdfsUrl + "/data/recommend/user2");
    HadoopUtil.delete(conf, output2Path);
    job2.setJarByClass(UserTag.class);
    job2.setMapperClass(Mapper_Part2.class);
    job2.setReducerClass(Reduce_Part2.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job2, new Path(hdfsUrl + "/data/recommend/user1"));
    FileInputFormat.addInputPath(job2, new Path(hdfsUrl + "/data/recommend/ClusterPointsInfo.txt"));
    FileOutputFormat.setOutputPath(job2, output2Path);
    job2.waitForCompletion(true);

    Job job3 = Job.getInstance(conf, "countUserNewsCatMapping");
    Path output3Path = new Path(hdfsUrl + "/data/recommend/user3");
    HadoopUtil.delete(conf, output3Path);
    job3.setMapperClass(Mapper_Part3.class);
    job3.setReducerClass(Reduce_Part3.class);
    job3.setMapOutputKeyClass(Text.class);
    job3.setMapOutputValueClass(Text.class);
    job3.setOutputKeyClass(Text.class);
    job3.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job3, new Path(hdfsUrl + "/data/recommend/user2"));
    FileOutputFormat.setOutputPath(job3, output3Path);
    job3.waitForCompletion(true);

    Job job4 = Job.getInstance(conf, "generateClusterUniqueRecord");
    Path output4Path = new Path(hdfsUrl + "/data/recommend/user4");
    HadoopUtil.delete(conf, output4Path);
    job4.setMapperClass(Mapper_Part4.class);
    job4.setReducerClass(Reduce_Part4.class);
    job4.setMapOutputKeyClass(Text.class);
    job4.setMapOutputValueClass(Text.class);
    job4.setOutputKeyClass(Text.class);
    job4.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job4, new Path(hdfsUrl + "/data/recommend/ClusterPointsInfo.txt"));
    FileOutputFormat.setOutputPath(job4, output4Path);
    job4.waitForCompletion(true);

    FileSystem fsopen = FileSystem.get(conf);
    FSDataInputStream in = fsopen.open(new Path(hdfsUrl + "/data/recommend/user4/part-r-00000"));
    Scanner scan = new Scanner(in);
    List<String> keywordList = new ArrayList<String>();
    while (scan.hasNext()) {
        keywordList.add(scan.next());
    }
    conf.setStrings("category", keywordList.toArray(new String[keywordList.size()]));
    Path outPath4 = new Path(hdfsUrl + "/data/recommend/user5");
    if (fs.exists(outPath4)) {
        fs.delete(outPath4, true);
        System.out.println("???");
    }
    Job job5 = Job.getInstance(conf, "generateUserPreferableMatrix");
    job5.setMapperClass(Mapper_Part5.class);
    job5.setReducerClass(Reduce_Part5.class);
    job5.setMapOutputKeyClass(Text.class);
    job5.setMapOutputValueClass(Text.class);
    job5.setOutputKeyClass(Text.class);
    job5.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job5, new Path(hdfsUrl + "/data/recommend/user3"));
    FileOutputFormat.setOutputPath(job5, new Path(hdfsUrl + "/data/recommend/user5"));
    job5.waitForCompletion(true);

}

From source file:hydrograph.engine.cascading.scheme.TextDelimitedAndFixedWidth.java

License:Apache License

@Override
public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (hasZippedFiles(FileInputFormat.getInputPaths(asJobConfInstance(conf))))
        throw new IllegalStateException("cannot read zip files: "
                + Arrays.toString(FileInputFormat.getInputPaths(asJobConfInstance(conf))));

    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.input.format.class", DelimitedAndFixedWidthInputFormat.class, InputFormat.class);
    conf.set("charsetName", charsetName);
    conf.set("quote", quote);
    conf.set("lengthsAndDelimiters", DelimitedAndFixedWidthHelper.arrayToString(lengthsAndDelimiters));
    conf.setStrings("lengthsAndDelimitersType", lengthsAndDelimitersType);
}

From source file:importToNewTable.createDataJob.java

public void importData(String filePath, String[] selectedFamilies, String[] keys) throws Exception {

    //Use family class get columns names.
    family familyName = new family();
    String[] column = familyName.columnfamily(filePath);
    //Get input file name to be used as table name.
    File f = new File(filePath);
    String fname = f.getName();/*www  .  j a  va2s .  c  om*/
    String vcfName = FilenameUtils.removeExtension(fname);

    createTable ht = new createTable();
    ht.table(vcfName, column, selectedFamilies);

    //Mapreduce job.
    Configuration conf = new Configuration();
    conf.setStrings("column", column);
    conf.setStrings("keys", keys);
    conf.setStrings("selectedFamilies", selectedFamilies);
    conf.set("tableName", vcfName);
    Job job = Job.getInstance(conf, "VCF");
    job.setJarByClass(importToNewTable.createDataJob.class);
    job.setMapperClass(importToNewTable.createDataMapper.class);

    job.setReducerClass(org.apache.hadoop.hbase.mapreduce.PutSortReducer.class);

    // TODO: specify output types
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(Put.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    // TODO: specify input and output DIRECTORIES (not files)
    FileInputFormat.setInputPaths(job, new Path(filePath));
    File temp = new File("/tmp/HVCF/output");
    this.deleteDir(temp);
    FileOutputFormat.setOutputPath(job, new Path("/tmp/HVCF/output"));

    Configuration hbconf = HBaseConfiguration.create();
    HTable table = new HTable(hbconf, vcfName);
    HFileOutputFormat.configureIncrementalLoad(job, table);

    if (!job.waitForCompletion(true)) {
        return;
    }

    //Bulkload Hfiles into HBase table.
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(hbconf);
    loader.doBulkLoad(new Path("/tmp/HVCF/output"), table);

}