Example usage for org.apache.hadoop.mapred Reporter NULL

List of usage examples for org.apache.hadoop.mapred Reporter NULL

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter NULL.

Prototype

Reporter NULL

To view the source code for org.apache.hadoop.mapred Reporter NULL.

Click Source Link

Document

A constant of Reporter type that does nothing.

Usage

From source file:org.apache.mahout.df.mapred.partial.PartialSequentialBuilder.java

License:Apache License

@Override
protected void runJob(JobConf job) throws IOException {
    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, job.getNumMapTasks());
    log.debug("Nb splits : {}", splits.length);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);//from ww  w.j  a v a2 s.  c  om

    int numTrees = Builder.getNbTrees(job); // total number of trees

    firstOutput = new PartialOutputCollector(numTrees);
    Reporter reporter = Reporter.NULL;

    firstIds = new int[splits.length];
    sizes = new int[splits.length];

    // to compute firstIds, process the splits in file order
    int firstId = 0;
    long slowest = 0; // duration of slowest map
    for (InputSplit split : splits) {
        int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition

        RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

        LongWritable key = reader.createKey();
        Text value = reader.createValue();

        Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, splits.length,
                numTrees);

        long time = System.currentTimeMillis();

        firstIds[hp] = firstId;

        while (reader.next(key, value)) {
            mapper.map(key, value, firstOutput, reporter);
            firstId++;
            sizes[hp]++;
        }

        mapper.close();

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
}

From source file:org.apache.mahout.df.mapred.partial.PartialSequentialBuilder.java

License:Apache License

/**
 * The second step uses the trees to predict the rest of the instances outside
 * their own partition/*from ww  w  .ja  va 2 s  .  c o  m*/
 * 
 * @throws IOException
 * 
 */
void secondStep(JobConf job, Path forestPath, PredictionCallback callback) throws IOException {
    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, job.getNumMapTasks());
    log.debug("Nb splits : {}", splits.length);

    Builder.sortSplits(splits);

    int numTrees = Builder.getNbTrees(job); // total number of trees

    // compute the expected number of outputs
    int total = 0;
    for (int p = 0; p < splits.length; p++) {
        total += Step2Mapper.nbConcerned(splits.length, numTrees, p);
    }

    secondOutput = new PartialOutputCollector(total);
    Reporter reporter = Reporter.NULL;
    long slowest = 0; // duration of slowest map

    for (int partition = 0; partition < splits.length; partition++) {
        InputSplit split = splits[partition];
        RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

        LongWritable key = reader.createKey();
        Text value = reader.createValue();

        // load the output of the 1st step
        int nbConcerned = Step2Mapper.nbConcerned(splits.length, numTrees, partition);
        TreeID[] fsKeys = new TreeID[nbConcerned];
        Node[] fsTrees = new Node[nbConcerned];

        FileSystem fs = forestPath.getFileSystem(job);
        int numInstances = InterResults.load(fs, forestPath, splits.length, numTrees, partition, fsKeys,
                fsTrees);

        Step2Mapper mapper = new Step2Mapper();
        mapper.configure(partition, dataset, fsKeys, fsTrees, numInstances);

        long time = System.currentTimeMillis();

        while (reader.next(key, value)) {
            mapper.map(key, value, secondOutput, reporter);
        }

        mapper.close();

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
}

From source file:org.apache.mahout.df.mapred.partial.Step0JobTest.java

License:Apache License

public void testStep0Mapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // create a dataset large enough to be split up
    String descriptor = Utils.randomDescriptor(rng, numAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, numInstances);
    String[] sData = Utils.double2String(source);

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);// www.java  2  s  . co  m

    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Step0OutputCollector collector = new Step0OutputCollector(numMaps);
    Reporter reporter = Reporter.NULL;

    for (int p = 0; p < numMaps; p++) {
        InputSplit split = sorted[p];
        RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

        LongWritable key = reader.createKey();
        Text value = reader.createValue();

        Step0Mapper mapper = new Step0Mapper();
        mapper.configure(p);

        Long firstKey = null;
        int size = 0;

        while (reader.next(key, value)) {
            if (firstKey == null) {
                firstKey = key.get();
            }

            mapper.map(key, value, collector, reporter);

            size++;
        }

        mapper.close();

        // validate the mapper's output
        assertEquals(p, collector.keys[p]);
        assertEquals(firstKey.longValue(), collector.values[p].getFirstId());
        assertEquals(size, collector.values[p].getSize());
    }

}

From source file:org.apache.mahout.df.mapred.partial.Step0JobTest.java

License:Apache License

public void testProcessOutput() throws Exception {
    Random rng = RandomUtils.getRandom();

    // create a dataset large enough to be split up
    String descriptor = Utils.randomDescriptor(rng, numAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, numInstances);

    // each instance label is its index in the dataset
    int labelId = Utils.findLabel(descriptor);
    for (int index = 0; index < numInstances; index++) {
        source[index][labelId] = index;//from   w ww .jav a  2s.c  om
    }

    String[] sData = Utils.double2String(source);

    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);

    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);
    FileInputFormat.setInputPaths(job, dataPath);

    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);

    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);

    Reporter reporter = Reporter.NULL;

    int[] keys = new int[numMaps];
    Step0Output[] values = new Step0Output[numMaps];

    int[] expectedIds = new int[numMaps];

    for (int p = 0; p < numMaps; p++) {
        InputSplit split = sorted[p];
        RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);

        LongWritable key = reader.createKey();
        Text value = reader.createValue();

        Long firstKey = null;
        int size = 0;

        while (reader.next(key, value)) {
            if (firstKey == null) {
                firstKey = key.get();
                expectedIds[p] = converter.convert(0, value.toString()).label;
            }

            size++;
        }

        keys[p] = p;
        values[p] = new Step0Output(firstKey, size);
    }

    Step0Output[] partitions = Step0Job.processOutput(keys, values);

    int[] actualIds = Step0Output.extractFirstIds(partitions);

    assertTrue("Expected: " + Arrays.toString(expectedIds) + " But was: " + Arrays.toString(actualIds),
            Arrays.equals(expectedIds, actualIds));
}

From source file:org.apache.mahout.df.mapred.partial.Step1MapperTest.java

License:Apache License

public void testMapper() throws Exception {
    Long seed = null;//from www  .  java2  s  .c  o m
    Random rng = RandomUtils.getRandom();

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    MockTreeBuilder treeBuilder = new MockTreeBuilder();

    LongWritable key = new LongWritable();
    Text value = new Text();

    int treeIndex = 0;

    for (int partition = 0; partition < nbMappers; partition++) {
        String[] split = splits[partition];
        treeBuilder.setExpected(DataLoader.loadData(dataset, split));

        // expected number of trees that this mapper will build
        int mapNbTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);

        PartialOutputCollector output = new PartialOutputCollector(mapNbTrees);

        MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, seed, partition, nbMappers, nbTrees);

        // make sure the mapper computed firstTreeId correctly
        assertEquals(treeIndex, mapper.getFirstTreeId());

        for (int index = 0; index < split.length; index++) {
            key.set(index);
            value.set(split[index]);
            mapper.map(key, value, output, Reporter.NULL);
        }

        mapper.close();

        // make sure the mapper built all its trees
        assertEquals(mapNbTrees, output.nbOutputs());

        // check the returned keys
        for (TreeID k : output.getKeys()) {
            assertEquals(partition, k.partition());
            assertEquals(treeIndex, k.treeId());

            treeIndex++;
        }
    }
}

From source file:org.apache.mahout.df.mapred.partial.Step2MapperTest.java

License:Apache License

public void testMapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    // prepare first step output
    TreeID[] keys = new TreeID[nbTrees];
    Node[] trees = new Node[nbTrees];
    int[] sizes = new int[nbMappers];

    int treeIndex = 0;
    for (int partition = 0; partition < nbMappers; partition++) {
        int nbMapTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);

        for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) {
            keys[treeIndex] = new TreeID(partition, treeIndex);
            // put the partition in the leaf's label
            // this way we can track the outputs
            trees[treeIndex] = new Leaf(partition);
        }/*  w w w .j a v  a2 s. c om*/

        sizes[partition] = splits[partition].length;
    }

    // store the first step outputs in a file
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path forestPath = new Path("testdata/Step2MapperTest.forest");
    InterResults.store(fs, forestPath, keys, trees, sizes);

    LongWritable key = new LongWritable();
    Text value = new Text();

    for (int partition = 0; partition < nbMappers; partition++) {
        String[] split = splits[partition];

        // number of trees that will be handled by the mapper
        int nbConcerned = Step2Mapper.nbConcerned(nbMappers, nbTrees, partition);

        PartialOutputCollector output = new PartialOutputCollector(nbConcerned);

        // load the current mapper's (key, tree) pairs
        TreeID[] curKeys = new TreeID[nbConcerned];
        Node[] curTrees = new Node[nbConcerned];
        InterResults.load(fs, forestPath, nbMappers, nbTrees, partition, curKeys, curTrees);

        // simulate the job
        MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length);

        for (int index = 0; index < split.length; index++) {
            key.set(index);
            value.set(split[index]);
            mapper.map(key, value, output, Reporter.NULL);
        }

        mapper.close();

        // make sure the mapper did not return its own trees
        assertEquals(nbConcerned, output.nbOutputs());

        // check the returned results
        int current = 0;
        for (int index = 0; index < nbTrees; index++) {
            if (keys[index].partition() == partition) {
                // should not be part of the results
                continue;
            }

            TreeID k = output.getKeys()[current];

            // the tree should receive the partition's index
            assertEquals(partition, k.partition());

            // make sure all the trees of the other partitions are handled in the
            // correct order
            assertEquals(index, k.treeId());

            int[] predictions = output.getValues()[current].getPredictions();

            // all the instances of the partition should be classified
            assertEquals(split.length, predictions.length);
            assertEquals("at least one instance of the partition was not classified", -1,
                    ArrayUtils.indexOf(predictions, -1));

            // the tree must not belong to the mapper's partition
            int treePartition = predictions[0];
            assertFalse("Step2Mapper returned a tree from its own partition", partition == treePartition);

            current++;
        }
    }
}

From source file:org.apache.orc.bench.ColumnProjectionBenchmark.java

License:Apache License

@Benchmark
public void parquet(ExtraCounters counters) throws Exception {
    JobConf conf = new JobConf();
    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
    conf.set("fs.defaultFS", "track:///");
    if ("taxi".equals(dataset)) {
        conf.set("columns", "vendor_id,pickup_time");
        conf.set("columns.types", "int,timestamp");
    } else if ("sales".equals(dataset)) {
        conf.set("columns", "sales_id,customer_id");
        conf.set("columns.types", "bigint,bigint");
    } else if ("github".equals(dataset)) {
        conf.set("columns", "actor,created_at");
        conf.set("columns.types",
                "struct<avatar_url:string,gravatar_id:string," + "id:int,login:string,url:string>,timestamp");
    } else {/*from  w w  w .  jav a  2  s  . co m*/
        throw new IllegalArgumentException("Unknown data set " + dataset);
    }
    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class);
    statistics.reset();
    ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class);

    NullWritable nada = NullWritable.get();
    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {});
    org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper(
            inputFormat, split, conf, Reporter.NULL);
    ArrayWritable value = recordReader.createValue();
    while (recordReader.next(nada, value)) {
        counters.records += 1;
    }
    recordReader.close();
    counters.bytesRead += statistics.getBytesRead();
    counters.reads += statistics.getReadOps();
    counters.invocations += 1;
}

From source file:org.apache.orc.bench.convert.parquet.ParquetReader.java

License:Apache License

public ParquetReader(Path path, TypeDescription schema, Configuration conf) throws IOException {
    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {});
    JobConf jobConf = new JobConf(conf);
    reader = new MapredParquetInputFormat().getRecordReader(split, jobConf, Reporter.NULL);
    value = reader.createValue();//  w  w  w  .j a  v a  2s . c om
    converters = new Converter[schema.getChildren().size()];
    List<TypeDescription> children = schema.getChildren();
    for (int c = 0; c < converters.length; ++c) {
        converters[c] = createConverter(children.get(c));
    }
}

From source file:org.apache.orc.bench.convert.parquet.ParquetWriter.java

License:Apache License

public ParquetWriter(Path path, TypeDescription schema, Configuration conf, CompressionKind compression)
        throws IOException {
    JobConf jobConf = new JobConf(conf);
    Properties tableProperties = Utilities.convertSchemaToHiveConfig(schema);
    this.schema = schema;
    jobConf.set(ParquetOutputFormat.COMPRESSION, getCodec(compression).name());
    writer = new MapredParquetOutputFormat().getHiveRecordWriter(jobConf, path, ParquetHiveRecord.class,
            compression != CompressionKind.NONE, tableProperties, Reporter.NULL);
    record = new ParquetHiveRecord(null, OrcBenchmarkUtilities.createObjectInspector(schema));
}

From source file:org.apache.orc.bench.FullReadBenchmark.java

License:Apache License

@Benchmark
public void parquet(ExtraCounters counters) throws Exception {
    JobConf conf = new JobConf();
    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
    conf.set("fs.defaultFS", "track:///");
    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class);
    statistics.reset();/*  w w  w .  ja v a2 s .c om*/
    ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class);

    NullWritable nada = NullWritable.get();
    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {});
    org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper(
            inputFormat, split, conf, Reporter.NULL);
    ArrayWritable value = recordReader.createValue();
    while (recordReader.next(nada, value)) {
        counters.records += 1;
    }
    recordReader.close();
    counters.bytesRead += statistics.getBytesRead();
    counters.reads += statistics.getReadOps();
    counters.invocations += 1;
}