Example usage for org.apache.hadoop.mapred Reporter NULL

List of usage examples for org.apache.hadoop.mapred Reporter NULL

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter NULL.

Prototype

Reporter NULL

To view the source code for org.apache.hadoop.mapred Reporter NULL.

Click Source Link

Document

A constant of Reporter type that does nothing.

Usage

From source file:org.apache.orc.bench.hive.ColumnProjectionBenchmark.java

License:Apache License

@Benchmark
public void parquet(ReadCounters counters) throws Exception {
    JobConf conf = new JobConf();
    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
    conf.set("fs.defaultFS", "track:///");
    if ("taxi".equals(dataset)) {
        conf.set("columns", "vendor_id,pickup_time");
        conf.set("columns.types", "int,timestamp");
    } else if ("sales".equals(dataset)) {
        conf.set("columns", "sales_id,customer_id");
        conf.set("columns.types", "bigint,bigint");
    } else if ("github".equals(dataset)) {
        conf.set("columns", "actor,created_at");
        conf.set("columns.types",
                "struct<avatar_url:string,gravatar_id:string," + "id:int,login:string,url:string>,timestamp");
    } else {/*w ww . ja  va  2 s. c o m*/
        throw new IllegalArgumentException("Unknown data set " + dataset);
    }
    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class);
    statistics.reset();
    ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class);

    NullWritable nada = NullWritable.get();
    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {});
    org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper(
            inputFormat, split, conf, Reporter.NULL);
    ArrayWritable value = recordReader.createValue();
    while (recordReader.next(nada, value)) {
        counters.addRecords(1);
    }
    recordReader.close();
    counters.addBytes(statistics.getReadOps(), statistics.getBytesRead());
    counters.addInvocation();
}

From source file:org.apache.orc.bench.hive.FullReadBenchmark.java

License:Apache License

@Benchmark
public void parquet(ReadCounters counters) throws Exception {
    JobConf conf = new JobConf();
    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
    conf.set("fs.defaultFS", "track:///");
    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class);
    statistics.reset();//  w w  w .j  a v  a  2s  .  co m
    ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class);

    NullWritable nada = NullWritable.get();
    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {});
    org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper(
            inputFormat, split, conf, Reporter.NULL);
    ArrayWritable value = recordReader.createValue();
    while (recordReader.next(nada, value)) {
        counters.addRecords(1);
    }
    recordReader.close();
    counters.addBytes(statistics.getReadOps(), statistics.getBytesRead());
    counters.addInvocation();
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

@Test
public void testAllTypes() throws Exception {
    conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0");
    conf.setOutputCommitter(NullOutputCommitter.class);
    final String typeStr = "struct<b1:binary,b2:boolean,b3:tinyint,"
            + "c:char(10),d1:date,d2:decimal(20,5),d3:double,fff:float,int:int,"
            + "l:array<bigint>,map:map<smallint,string>,"
            + "str:struct<u:uniontype<timestamp,varchar(100)>>,ts:timestamp>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    FileOutputFormat.setOutputPath(conf, workDir);
    TypeDescription type = TypeDescription.fromString(typeStr);

    // build a row object
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    ((BytesWritable) row.getFieldValue(0)).set(new byte[] { 1, 2, 3, 4 }, 0, 4);
    ((BooleanWritable) row.getFieldValue(1)).set(true);
    ((ByteWritable) row.getFieldValue(2)).set((byte) 23);
    ((Text) row.getFieldValue(3)).set("aaabbbcccddd");
    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
    ((DateWritable) row.getFieldValue(4)).set(DateWritable.millisToDays(format.parse("2016-04-01").getTime()));
    ((HiveDecimalWritable) row.getFieldValue(5)).set(new HiveDecimalWritable("1.23"));
    ((DoubleWritable) row.getFieldValue(6)).set(1.5);
    ((FloatWritable) row.getFieldValue(7)).set(4.5f);
    ((IntWritable) row.getFieldValue(8)).set(31415);
    OrcList<LongWritable> longList = (OrcList<LongWritable>) row.getFieldValue(9);
    longList.add(new LongWritable(123));
    longList.add(new LongWritable(456));
    OrcMap<ShortWritable, Text> map = (OrcMap<ShortWritable, Text>) row.getFieldValue(10);
    map.put(new ShortWritable((short) 1000), new Text("aaaa"));
    map.put(new ShortWritable((short) 123), new Text("bbbb"));
    OrcStruct struct = (OrcStruct) row.getFieldValue(11);
    OrcUnion union = (OrcUnion) struct.getFieldValue(0);
    union.set((byte) 1, new Text("abcde"));
    ((OrcTimestamp) row.getFieldValue(12)).set("1996-12-11 15:00:00");
    NullWritable nada = NullWritable.get();
    RecordWriter<NullWritable, OrcStruct> writer = new OrcOutputFormat<OrcStruct>().getRecordWriter(fs, conf,
            "all.orc", Reporter.NULL);
    for (int r = 0; r < 10; ++r) {
        row.setFieldValue(8, new IntWritable(r * 10));
        writer.write(nada, row);/*from w  w  w .j  a  v a  2s .c o  m*/
    }
    union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56"));
    for (int r = 0; r < 10; ++r) {
        row.setFieldValue(8, new IntWritable(r * 10 + 100));
        writer.write(nada, row);
    }
    OrcStruct row2 = new OrcStruct(type);
    writer.write(nada, row2);
    row.setFieldValue(8, new IntWritable(210));
    writer.write(nada, row);
    writer.close(Reporter.NULL);

    FileSplit split = new FileSplit(new Path(workDir, "all.orc"), 0, 100000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().getRecordReader(split, conf,
            Reporter.NULL);
    nada = reader.createKey();
    row = reader.createValue();
    for (int r = 0; r < 22; ++r) {
        assertEquals(true, reader.next(nada, row));
        if (r == 20) {
            for (int c = 0; c < 12; ++c) {
                assertEquals(null, row.getFieldValue(c));
            }
        } else {
            assertEquals(new BytesWritable(new byte[] { 1, 2, 3, 4 }), row.getFieldValue(0));
            assertEquals(new BooleanWritable(true), row.getFieldValue(1));
            assertEquals(new ByteWritable((byte) 23), row.getFieldValue(2));
            assertEquals(new Text("aaabbbcccd"), row.getFieldValue(3));
            assertEquals(new DateWritable(DateWritable.millisToDays(format.parse("2016-04-01").getTime())),
                    row.getFieldValue(4));
            assertEquals(new HiveDecimalWritable("1.23"), row.getFieldValue(5));
            assertEquals(new DoubleWritable(1.5), row.getFieldValue(6));
            assertEquals(new FloatWritable(4.5f), row.getFieldValue(7));
            assertEquals(new IntWritable(r * 10), row.getFieldValue(8));
            assertEquals(longList, row.getFieldValue(9));
            assertEquals(map, row.getFieldValue(10));
            if (r < 10) {
                union.set((byte) 1, new Text("abcde"));
            } else {
                union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56"));
            }
            assertEquals("row " + r, struct, row.getFieldValue(11));
            assertEquals("row " + r, new OrcTimestamp("1996-12-11 15:00:00"), row.getFieldValue(12));
        }
    }
    assertEquals(false, reader.next(nada, row));
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

/**
 * Test the case where the top level isn't a struct, but a long.
 *//*www .jav  a  2 s. c  o m*/
@Test
public void testLongRoot() throws Exception {
    conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0");
    conf.setOutputCommitter(NullOutputCommitter.class);
    conf.set(OrcConf.COMPRESS.getAttribute(), "SNAPPY");
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setInt(OrcConf.BUFFER_SIZE.getAttribute(), 64 * 1024);
    conf.set(OrcConf.WRITE_FORMAT.getAttribute(), "0.11");
    final String typeStr = "bigint";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    FileOutputFormat.setOutputPath(conf, workDir);
    TypeDescription type = TypeDescription.fromString(typeStr);
    LongWritable value = new LongWritable();
    NullWritable nada = NullWritable.get();
    RecordWriter<NullWritable, LongWritable> writer = new OrcOutputFormat<LongWritable>().getRecordWriter(fs,
            conf, "long.orc", Reporter.NULL);
    for (long lo = 0; lo < 2000; ++lo) {
        value.set(lo);
        writer.write(nada, value);
    }
    writer.close(Reporter.NULL);

    Path path = new Path(workDir, "long.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(CompressionKind.SNAPPY, file.getCompressionKind());
    assertEquals(2000, file.getNumberOfRows());
    assertEquals(1000, file.getRowIndexStride());
    assertEquals(64 * 1024, file.getCompressionSize());
    assertEquals(OrcFile.Version.V_0_11, file.getFileVersion());
    FileSplit split = new FileSplit(path, 0, 100000, new String[0]);
    RecordReader<NullWritable, LongWritable> reader = new OrcInputFormat<LongWritable>().getRecordReader(split,
            conf, Reporter.NULL);
    nada = reader.createKey();
    value = reader.createValue();
    for (long lo = 0; lo < 2000; ++lo) {
        assertEquals(true, reader.next(nada, value));
        assertEquals(lo, value.get());
    }
    assertEquals(false, reader.next(nada, value));
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcKey
 * @throws Exception//www.  j  av  a  2  s.com
 */
@Test
public void testOrcKey() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.set("mapreduce.task.attempt.id", "attempt_jt0_0_m_0_0");
    String TYPE_STRING = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setOutputCommitter(NullOutputCommitter.class);
    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcKey key = new OrcKey(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(fs, conf, "key.orc",
            Reporter.NULL);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 2000; ++r) {
        ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r)));
        writer.write(nada, key);
    }
    writer.close(Reporter.NULL);
    Path path = new Path(workDir, "key.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(2000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcValue
 * @throws Exception/*ww  w.  j ava  2 s. co  m*/
 */
@Test
public void testOrcValue() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.set("mapreduce.task.attempt.id", "attempt_jt0_0_m_0_0");
    String TYPE_STRING = "struct<i:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setOutputCommitter(NullOutputCommitter.class);
    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcValue value = new OrcValue(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(fs, conf, "value.orc",
            Reporter.NULL);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        ((OrcStruct) value.value).setAllFields(new IntWritable(r));
        writer.write(nada, value);
    }
    writer.close(Reporter.NULL);
    Path path = new Path(workDir, "value.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(3000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.phoenix.hive.mapreduce.PhoenixRecordWriter.java

License:Apache License

@Override
public void close(boolean abort) throws IOException {
    close(Reporter.NULL);
}

From source file:org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Apache License

private void partitionTextCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen,
        int brlen, int bclen) throws DMLRuntimeException {
    long row = -1;
    long col = -1;

    try {// w w w . j a v a 2  s  . c  om
        //STEP 1: read matrix from HDFS and write blocks to local staging area
        //check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileInputFormat.addInputPath(job, path);
        TextInputFormat informat = new TextInputFormat();
        informat.configure(job);
        InputSplit[] splits = informat.getSplits(job, 1);

        LinkedList<Cell> buffer = new LinkedList<>();
        LongWritable key = new LongWritable();
        Text value = new Text();
        FastStringTokenizer st = new FastStringTokenizer(' ');

        for (InputSplit split : splits) {
            RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
            try {
                while (reader.next(key, value)) {
                    st.reset(value.toString()); //reset tokenizer
                    row = st.nextLong();
                    col = st.nextLong();
                    double lvalue = st.nextDouble();
                    Cell tmp = new Cell(row, col, lvalue);

                    buffer.addLast(tmp);
                    if (buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) //periodic flush
                    {
                        appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                        buffer.clear();
                    }
                }

                //final flush
                if (!buffer.isEmpty()) {
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }

        //STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerTextCell(job, fnameNew, fnameStaging,
                        fnamesPartitions, start, end));
                threads[i].start();
            }

            for (Thread t : threads)
                t.join();
        } else {
            for (String pdir : fnamesPartitions)
                writeTextCellFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        //post-mortem error handling and bounds checking
        if (row < 1 || row > rlen || col < 1 || col > clen) {
            throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] "
                    + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        } else
            throw new DMLRuntimeException("Unable to partition text cell matrix.", e);
    }
}

From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java

License:Apache License

/**
 * Get the list of hostnames where the input split is located.
 *///from   w  ww .j  a  v  a  2s .c o m
@Override
public String[] getLocations() throws IOException {
    //Timing time = new Timing();
    //time.start();

    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = IOUtilFunctions.getFileSystem(getPath(), job);

    //read task string
    LongWritable key = new LongWritable();
    Text value = new Text();
    RecordReader<LongWritable, Text> reader = null;
    try {
        reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL);
        reader.next(key, value);
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }

    //parse task
    Task t = Task.parseCompactString(value.toString());

    //get all locations
    HashMap<String, Integer> hosts = new HashMap<>();

    if (t.getType() == TaskType.SET) {
        for (IntObject val : t.getIterations()) {
            String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }
    } else //TaskType.RANGE
    {
        //since this is a serial process, we use just the first iteration
        //as a heuristic for location information
        long lFrom = t.getIterations().get(0).getLongValue();
        long lTo = t.getIterations().get(1).getLongValue();
        for (long li : new long[] { lFrom, lTo }) {
            String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }
    }

    //majority consensus on top host
    return getTopHosts(hosts);
}

From source file:org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile.java

License:Apache License

private static void mergeTextCellWithoutComp(String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO)
        throws DMLRuntimeException {
    try {/*from   ww  w.jav a2 s. c  om*/
        //delete target file if already exists
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);

        if (ALLOW_COPY_CELLFILES) {
            copyAllFiles(fnameNew, inMO);
            return; //we're done
        }

        //actual merge
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fnameNew);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

        String valueStr = null;

        try {
            for (MatrixObject in : inMO) //read/write all inputs
            {
                if (LOG.isTraceEnabled())
                    LOG.trace("ResultMerge (local, file): Merge input " + in.hashCode() + " (fname="
                            + in.getFileName() + ") via stream merge");

                JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf());
                Path tmpPath = new Path(in.getFileName());
                FileInputFormat.addInputPath(tmpJob, tmpPath);
                TextInputFormat informat = new TextInputFormat();
                informat.configure(tmpJob);
                InputSplit[] splits = informat.getSplits(tmpJob, 1);

                LongWritable key = new LongWritable();
                Text value = new Text();

                for (InputSplit split : splits) {
                    RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, tmpJob,
                            Reporter.NULL);
                    try {
                        while (reader.next(key, value)) {
                            valueStr = value.toString().trim();
                            out.write(valueStr + "\n");
                        }
                    } finally {
                        IOUtilFunctions.closeSilently(reader);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException("Unable to merge text cell results.", ex);
    }
}