Example usage for org.apache.hadoop.conf Configuration Configuration

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration Configuration.

Prototype

public Configuration()

Source Link

Document

A new configuration.

Usage

From source file:WriteFDFPerformance.java

License:Open Source License

static void writetxt() throws IOException {
    FSDataOutputStream fos = FileSystem.get(new Configuration()).create(new Path("txt/txt"));
    fos.writeBytes(String.valueOf(127) + "," + String.valueOf(1000) + "\r\n");
    fos.writeBytes(String.valueOf(127) + "," + String.valueOf(1000) + "\r\n");
    fos.writeBytes(String.valueOf(127) + "," + String.valueOf(1000) + "\r\n");
    fos.close();//  w  w  w  .ja  va2s. co m
}

From source file:FDFRW.java

License:Open Source License

static void read(String filename) throws Exception {
    FormatDataFile fdf = new FormatDataFile(new Configuration());
    fdf.open(filename);//from  ww w . ja  va  2  s.c  om
    for (int i = 0; i < 50; i++) {
        Record rec = fdf.getRecordByLine(8000000 + i);
        rec.show();
    }
    fdf.close();

}

From source file:LookupPostings.java

License:Apache License

/**
 * Runs this tool./*  www  .  j  ava2 s. c o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostings.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();

    System.out.println("Looking up postings for the term \"starcross'd\"");
    key.set("starcross'd");

    reader.get(key, value);

    ArrayListWritable<PairOfInts> postings = value.getRightElement();
    for (PairOfInts pair : postings) {
        System.out.println(pair);
        collection.seek(pair.getLeftElement());
        System.out.println(d.readLine());
    }

    key.set("gold");
    reader.get(key, value);
    System.out.println("Complete postings list for 'gold': " + value);

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfInts pair : postings) {
        goldHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("silver");
    reader.get(key, value);
    System.out.println("Complete postings list for 'silver': " + value);

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfInts pair : postings) {
        silverHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : silverHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("bronze");
    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");
    }

    collection.close();
    reader.close();

    return 0;
}

From source file:TestFDF.java

License:Open Source License

public static void main(String[] args) throws Exception {
    String file = "testtesttesttest";
    int num = 1000;
    Head head = new Head();
    FieldMap fieldMap = new FieldMap();

    fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0));
    fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1));
    fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2));
    fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3));
    fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4));
    fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5));
    fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6));

    head.setFieldMap(fieldMap);// www . ja va  2  s.c om

    Configuration conf = new Configuration();
    FormatDataFile fd = new FormatDataFile(conf);
    fd.create(file, head);
    Record record = new Record((short) 7);
    int i = 1;
    record.addValue(new FieldValue((byte) (1 + i), (short) 0));
    record.addValue(new FieldValue((short) (2 + i), (short) 1));
    record.addValue(new FieldValue((int) (3 + i), (short) 2));
    record.addValue(new FieldValue((long) (4 + i), (short) 3));
    record.addValue(new FieldValue((float) (5.5 + i), (short) 4));
    record.addValue(new FieldValue((double) (6.6 + i), (short) 5));
    record.addValue(new FieldValue(i + " "
            + str.substring(r.nextInt(str.length() / 2), str.length() / 2 - 1 + r.nextInt(str.length() / 2)),
            (short) 6));

    fd.addRecord(record);
    fd.close();

    fd = new FormatDataFile(conf);

    fd.open(file);
    Record rec = fd.getRecordByLine(0);
    rec.show();
    fd.close();

}

From source file:Importer.java

License:Open Source License

public static void copyFile(File file) throws Exception {
    //    String TEST_PREFIX = "";
    File destFile = new File(outDir, file.getName() + ".seq");
    Path dest = new Path(destFile.getAbsolutePath());

    Configuration conf = new Configuration();
    FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")),
            conf);// ww w  .j a v a 2  s .  c o  m
    CompressionCodec codec = new DefaultCodec();
    fileSys.mkdirs(dest.getParent());
    FSDataOutputStream outputStr = fileSys.create(dest);
    seqFileWriter = SequenceFile.createWriter(conf, outputStr, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, codec);
    String filename = file.getName();
    InputStream in = new BufferedInputStream(new FileInputStream(file));
    if (filename.endsWith(".bz2")) {
        in.read();
        in.read(); //snarf header
        in = new CBZip2InputStream(in);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII"));

    System.out.println("working on file " + file);
    int records = 0;
    long bytes = 0, bytes_since_status = 0;
    long startTime = System.currentTimeMillis();
    String s = null;
    Text content = new Text();
    while ((s = br.readLine()) != null) {
        if (s.startsWith("---END.OF.DOCUMENT---")) {
            Text name = new Text(hash(content));
            seqFileWriter.append(name, content);
            records++;
            content = new Text();
        } else {
            byte[] line_as_bytes = (s + " ").getBytes();
            for (byte b : line_as_bytes) {
                assert b < 128 : "found an unexpected high-bit set";
            }

            content.append(line_as_bytes, 0, line_as_bytes.length);
            bytes += line_as_bytes.length;
            /*
            bytes_since_status += line_as_bytes.length;
            if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB
              System.err.print('.');
              bytes_since_status = 0;
            }*/
        }
    } //end while
    if (content.getLength() > 5) {
        Text name = new Text(hash(content));
        seqFileWriter.append(name, content);
        records++;
    }
    totalBytes += bytes;
    totalRecords += records;
    long time = (System.currentTimeMillis() - startTime) / 1000 + 1;
    long kbSec = bytes / 1024 / time;
    System.out.println(new java.util.Date());
    System.out.println("File " + file.getName() + " " + records + " records, " + bytes + " bytes in " + time
            + " seconds (" + kbSec + " KB/sec).");
    in.close();
    seqFileWriter.close();
    outputStr.close();
}

From source file:Vectors.java

License:Apache License

public static void main(String args[]) throws IOException {
    String pathString = "preference/part-r-00000";
    Path examplar = new Path(pathString);
    Configuration conf = new Configuration();

    //     Vector vt=new RandomAccessSparseVector(3);
    //     vt.set(0, 2.0);
    //     VectorWritable vectorWritable=new VectorWritable(vt);
    //     vectorWritable.setWritesLaxPrecision(false);
    //     FileSystem fs=examplar.getFileSystem(conf);
    //     FSDataOutputStream out=fs.append(examplar);
    //     vectorWritable.write(out);
    for (int i = 0; i < 1; i++) {
        Vector vector = read(examplar, conf);
        System.out.println(vector);
    }/*  www.j av  a2s.co m*/
}

From source file:GetUserInfoGivenMovieId.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("movieId", args[2]);
    Job job = new Job(conf, "GetUserInfoGivenMovieId");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setJarByClass(GetUserInfoGivenMovieId.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[3]));

    boolean flag1 = job.waitForCompletion(true);

    if (flag1) {/*w w w.ja v a 2 s. c  om*/
        Configuration conf2 = new Configuration();
        //FileSystem fs = FileSystem.get(conf2);
        //Path Intermediate = new Path(args[1]);
        //DistributedCache.addCacheFile(Intermediate.toUri(), conf2);
        //DistributedCache.addCacheFile(new URI(args[1]),conf2);

        Job job2 = new Job(conf2, "UserInfo");
        /*Job job2 = new Job(new Configuration());
        Configuration conf2 = job.getConfiguration();
        job2.setJobName("Join with Cache");
        DistributedCache.addCacheFile(new URI(args[1]), conf2);*/
        job2.addCacheFile(new URI(args[1]));
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        job2.setJarByClass(GetUserInfoGivenMovieId.class);
        job2.setMapperClass(MapWithJoin.class);
        //job2.setCombinerClass(Reduce.class);
        job2.setReducerClass(ReduceFinal.class);

        job2.setInputFormatClass(TextInputFormat.class);
        job2.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.addInputPath(job2, new Path(args[3]));
        FileOutputFormat.setOutputPath(job2, new Path(args[4]));
        job2.waitForCompletion(true);
    }
}

From source file:CrimenCasosTotales.java

public static void main(String args[]) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "casostotales");
    job.setJarByClass(WordCount.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordCount_NoCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from   w w w .  ja  v a 2s .c  om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount_NoCombiner.class);
    job.setMapperClass(TokenizerMapper.class);

    // delete this line to disable combining
    // job.setCombinerClass(IntSumReducer.class);

    job.setPartitionerClass(WordPartitioner.class);
    job.setNumReduceTasks(5);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Outlinks.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: inlinks <in> [<in>...] <out>");
        System.exit(2);/*  w w w . j a  v  a  2  s. c  o m*/
    }
    Job job = new Job(conf, "inlinks");
    job.setJarByClass(Outlinks.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IdentityReducer.class);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(10);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}