Example usage for org.apache.hadoop.mapred Reporter NULL

List of usage examples for org.apache.hadoop.mapred Reporter NULL

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter NULL.

Prototype

Reporter NULL

To view the source code for org.apache.hadoop.mapred Reporter NULL.

Click Source Link

Document

A constant of Reporter type that does nothing.

Usage

From source file:cascading.tap.hadoop.io.TapOutputCollector.java

License:Open Source License

private Reporter getReporter() {
    Reporter reporter = Reporter.NULL;

    if (flowProcess instanceof MapRed)
        reporter = ((MapRed) flowProcess).getReporter(); // may return Reporter.NULL

    return reporter;
}

From source file:cascading.tap.hadoop.TapCollector.java

License:Open Source License

private void initalize() throws IOException {
    tap.sinkInit(conf); // tap should not delete if called within a task

    OutputFormat outputFormat = conf.getOutputFormat();

    isFileOutputFormat = outputFormat instanceof FileOutputFormat;

    if (isFileOutputFormat) {
        Hadoop18TapUtil.setupJob(conf);//from w  ww  .j  av  a 2 s.  c  o  m

        if (prefix != null)
            filename = String.format(filenamePattern, prefix, "/", conf.getInt("mapred.task.partition", 0));
        else
            filename = String.format(filenamePattern, "", "", conf.getInt("mapred.task.partition", 0));

        Hadoop18TapUtil.setupTask(conf);
    }

    writer = outputFormat.getRecordWriter(null, conf, filename, Reporter.NULL);
}

From source file:cascading.tap.hadoop.TapIterator.java

License:Open Source License

private RecordReader makeReader(int currentSplit) throws IOException {
    if (LOG.isDebugEnabled())
        LOG.debug("reading split: " + currentSplit);

    return inputFormat.getRecordReader(splits[currentSplit], conf, Reporter.NULL);
}

From source file:cascading.tap.hadoop.ZipInputFormatTest.java

License:Open Source License

public void testSplits() throws Exception {
    JobConf job = new JobConf();
    FileSystem currentFs = FileSystem.get(job);

    Path file = new Path(workDir, "test.zip");

    Reporter reporter = Reporter.NULL;

    int seed = new Random().nextInt();
    LOG.info("seed = " + seed);
    Random random = new Random(seed);
    FileInputFormat.setInputPaths(job, file);

    for (int entries = 1; entries < MAX_ENTRIES; entries += random.nextInt(MAX_ENTRIES / 10) + 1) {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        ZipOutputStream zos = new ZipOutputStream(byteArrayOutputStream);
        long length = 0;

        LOG.debug("creating; zip file with entries = " + entries);

        // for each entry in the zip file
        for (int entryCounter = 0; entryCounter < entries; entryCounter++) {
            // construct zip entries splitting MAX_LENGTH between entries
            long entryLength = MAX_LENGTH / entries;
            ZipEntry zipEntry = new ZipEntry("/entry" + entryCounter + ".txt");
            zipEntry.setMethod(ZipEntry.DEFLATED);
            zos.putNextEntry(zipEntry);/*from ww  w. j av  a  2s .  c o  m*/

            for (length = entryCounter * entryLength; length < (entryCounter + 1) * entryLength; length++) {
                zos.write(Long.toString(length).getBytes());
                zos.write("\n".getBytes());
            }

            zos.flush();
            zos.closeEntry();
        }

        zos.flush();
        zos.close();

        currentFs.delete(file, true);

        OutputStream outputStream = currentFs.create(file);

        byteArrayOutputStream.writeTo(outputStream);
        outputStream.close();

        ZipInputFormat format = new ZipInputFormat();
        format.configure(job);
        LongWritable key = new LongWritable();
        Text value = new Text();
        InputSplit[] splits = format.getSplits(job, 100);

        BitSet bits = new BitSet((int) length);
        for (int j = 0; j < splits.length; j++) {
            LOG.debug("split[" + j + "]= " + splits[j]);
            RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter);

            try {
                int count = 0;

                while (reader.next(key, value)) {
                    int v = Integer.parseInt(value.toString());
                    LOG.debug("read " + v);

                    if (bits.get(v))
                        LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos());

                    assertFalse("key in multiple partitions.", bits.get(v));
                    bits.set(v);
                    count++;
                }

                LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count);
            } finally {
                reader.close();
            }
        }

        assertEquals("some keys in no partition.", length, bits.cardinality());
    }
}

From source file:cn.scala.es.ReportingUtils.java

License:Apache License

@SuppressWarnings({ "rawtypes" })
static void report(Progressable progressable, Stats stats) {
    progressable = (Progressable) CompatHandler.unwrap(progressable);

    if (progressable == null || progressable == Reporter.NULL) {
        return;//  w ww .  ja v a 2 s.c  o  m
    }

    if (progressable instanceof Reporter) {
        Reporter reporter = (Reporter) progressable;
        for (Counter count : Counter.ALL) {
            oldApiCounter(reporter, count, count.get(stats));
        }
    }

    if (progressable instanceof org.apache.hadoop.mapreduce.TaskInputOutputContext) {
        TaskInputOutputContext compatTioc = CompatHandler
                .taskInputOutputContext((org.apache.hadoop.mapreduce.TaskInputOutputContext) progressable);
        for (Counter count : Counter.ALL) {
            newApiCounter(compatTioc, count, count.get(stats));
        }
    }
}

From source file:com.bigdata.diane.MiniTestDFSIO.java

License:Apache License

private static void sequentialTest(FileSystem fs, int testType, int fileSize, int nrFiles) throws Exception {
    IOStatMapper<Long> ioer = null;
    if (testType == TEST_TYPE_READ)
        ioer = new ReadMapper();
    else if (testType == TEST_TYPE_WRITE)
        ioer = new WriteMapper();
    else/*  w  w w  . j  av a 2s. c  om*/
        return;
    for (int i = 0; i < nrFiles; i++)
        ioer.doIO(Reporter.NULL, BASE_FILE_NAME + Integer.toString(i), MEGA * fileSize);
}

From source file:com.cloudera.knittingboar.io.TestInputRecordsSplit.java

License:Apache License

/**
 * create an InputRecordSplit and then read some records
 * /*from  ww  w. j a v  a2s  .c om*/
 * - make sure we maintain split discipline
 * @throws IOException 
 * 
 */
public void testReadSplitViaInputRecordsSplit() throws IOException {

    // InputRecordsSplit(JobConf jobConf, InputSplit split)

    // needs to get a jobConf from somewhere, under the hood

    // needs a split calculated from the aforementioned jobConf

    JobConf job = new JobConf(defaultConf);
    Path file = new Path(workDir, "testReadSplitViaInputRecordsSplit.txt");

    int tmp_file_size = 2000;

    long block_size = localFs.getDefaultBlockSize();

    System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB");

    Writer writer = new OutputStreamWriter(localFs.create(file));
    try {
        for (int i = 0; i < tmp_file_size; i++) {
            writer.write(
                    "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 1, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 99");
            writer.write("\n");
        }
    } finally {
        writer.close();
    }

    System.out.println("file write complete, wrote " + tmp_file_size + " recs");

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;

    System.out.println("> setting splits for: " + workDir);

    //    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, file);

    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);
    LongWritable key = new LongWritable();
    Text value = new Text();

    int numSplits = 1;

    InputSplit[] splits = format.getSplits(job, numSplits);

    LOG.info("requested " + numSplits + " splits, splitting: got =        " + splits.length);

    System.out.println("---- debug splits --------- ");

    //InputSplit test_split = null;

    int total_read = 0;

    for (int x = 0; x < splits.length; x++) {

        System.out.println("> Split [" + x + "]: " + splits[x].getLength());

        int count = 0;
        InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[x]);
        while (custom_reader.next(value)) {

            count++;
            //

        }

        System.out.println("read: " + count + " records for split " + x);

        total_read += count;

    } // for each split

    System.out.println("--------- total read across all splits: " + total_read);

    assertEquals(tmp_file_size, total_read);

}

From source file:com.cloudera.knittingboar.io.TestInputRecordsSplit.java

License:Apache License

public void testReadSplitViaInputRecordsSplit_SplitReset() throws IOException {

    // InputRecordsSplit(JobConf jobConf, InputSplit split)

    // needs to get a jobConf from somewhere, under the hood

    // needs a split calculated from the aforementioned jobConf

    JobConf job = new JobConf(defaultConf);
    Path file = new Path(workDir, "testReadSplitViaInputRecordsSplit_SplitReset");

    int tmp_file_size = 2000;

    long block_size = localFs.getDefaultBlockSize();

    System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB");

    Writer writer = new OutputStreamWriter(localFs.create(file));
    try {/*from   www.  j  ava2 s .c om*/
        for (int i = 0; i < tmp_file_size; i++) {
            writer.write(
                    "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 1, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 99");
            writer.write("\n");
        }
    } finally {
        writer.close();
    }

    System.out.println("file write complete, wrote " + tmp_file_size + " recs");

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;

    //    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, file);

    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);
    LongWritable key = new LongWritable();
    Text value = new Text();

    int numSplits = 1;

    InputSplit[] splits = format.getSplits(job, numSplits);

    LOG.info("requested " + numSplits + " splits, splitting: got =        " + splits.length);

    System.out.println("---- testReadSplitViaInputRecordsSplit_SplitReset: debug splits --------- ");

    int total_read = 0;

    System.out.println("> Split [0]: " + splits[0].getLength());

    int count = 0;
    InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]);
    while (custom_reader.next(value)) {

        count++;

    }

    System.out.println("read: " + count + " records for split " + 0);

    int count_reset = 0;
    custom_reader.ResetToStartOfSplit();
    while (custom_reader.next(value)) {

        count_reset++;

    }

    System.out.println("read: " + count_reset + " records for split after reset " + 0);

    assertEquals(count, count_reset);

}

From source file:com.cloudera.knittingboar.io.TestSplitCalcs.java

License:Apache License

/**
 * /* w w w  . j ava  2  s . c o  m*/
 * - use the TextInputFormat.getSplits() to test pulling split info
 * @throws IOException 
 * 
 */
public void testGetSplits() throws IOException {

    TextInputFormat input = new TextInputFormat();

    JobConf job = new JobConf(defaultConf);
    Path file = new Path(workDir, "testGetSplits.txt");

    int tmp_file_size = 200000;

    long block_size = localFs.getDefaultBlockSize();

    System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB");

    Writer writer = new OutputStreamWriter(localFs.create(file));
    try {
        for (int i = 0; i < tmp_file_size; i++) {
            writer.write(
                    "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 1, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 99");
            writer.write("\n");
        }
    } finally {
        writer.close();
    }

    System.out.println("file write complete");

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;

    //    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, file);

    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);
    LongWritable key = new LongWritable();
    Text value = new Text();

    int numSplits = 1;

    InputSplit[] splits = format.getSplits(job, numSplits);

    LOG.info("requested " + numSplits + " splits, splitting: got =        " + splits.length);

    assertEquals(2, splits.length);

    System.out.println("---- debug splits --------- ");

    for (int x = 0; x < splits.length; x++) {

        System.out.println("> Split [" + x + "]: " + splits[x].getLength() + ", " + splits[x].toString() + ", "
                + splits[x].getLocations()[0]);

        RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[x], job, reporter);
        try {
            int count = 0;
            while (reader.next(key, value)) {

                if (count == 0) {
                    System.out.println("first: " + value.toString());
                    assertTrue(value.toString().contains("a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p"));
                }

                count++;
            }

            System.out.println("last: " + value.toString());

            assertTrue(value.toString().contains("a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p"));

        } finally {
            reader.close();
        }

    } // for each split

}

From source file:com.cloudera.knittingboar.records.TestTwentyNewsgroupsRecordFactory.java

License:Apache License

public void testRecordFactoryOnDatasetShard() throws Exception {

    TwentyNewsgroupsRecordFactory rec_factory = new TwentyNewsgroupsRecordFactory("\t");
    //rec_factory.setClassSplitString("\t");

    JobConf job = new JobConf(defaultConf);
    Path file = new Path(workDir, "20news-part-0.txt");

    int tmp_file_size = 200000;

    long block_size = localFs.getDefaultBlockSize();

    System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB");

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;

    FileInputFormat.setInputPaths(job, workDir);

    // try splitting the file in a variety of sizes
    TextInputFormat format = new TextInputFormat();
    format.configure(job);/*from  w  w  w.j a  v a2 s. c  om*/
    LongWritable key = new LongWritable();
    Text value = new Text();

    int numSplits = 1;

    InputSplit[] splits = format.getSplits(job, numSplits);

    LOG.info("requested " + numSplits + " splits, splitting: got =        " + splits.length);

    System.out.println("---- debug splits --------- ");

    rec_factory.Debug();

    int total_read = 0;

    long ts_start = System.currentTimeMillis();

    for (int x = 0; x < splits.length; x++) {

        System.out.println("> Split [" + x + "]: " + splits[x].getLength());

        int count = 0;
        InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[x]);
        while (custom_reader.next(value)) {

            Vector v = new RandomAccessSparseVector(TwentyNewsgroupsRecordFactory.FEATURES);
            rec_factory.processLine(value.toString(), v);

            count++;
            //break;

        }

        System.out.println("read: " + count + " records for split " + x);

        total_read += count;

    } // for each split

    long ts_total = System.currentTimeMillis() - ts_start;

    double vectors_per_sec = (double) total_read / ((double) ts_total / 1000);

    System.out.println("Time: " + ts_total);

    System.out.println("total recs read across all splits: " + total_read);

    System.out.println("Vectors converted / sec: " + vectors_per_sec);

    assertEquals(total_read, 11314);

    rec_factory.Debug();

}