List of usage examples for org.apache.hadoop.mapred Reporter NULL
Reporter NULL
To view the source code for org.apache.hadoop.mapred Reporter NULL.
Click Source Link
From source file:org.apache.orc.bench.hive.ColumnProjectionBenchmark.java
License:Apache License
@Benchmark public void parquet(ReadCounters counters) throws Exception { JobConf conf = new JobConf(); conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName()); conf.set("fs.defaultFS", "track:///"); if ("taxi".equals(dataset)) { conf.set("columns", "vendor_id,pickup_time"); conf.set("columns.types", "int,timestamp"); } else if ("sales".equals(dataset)) { conf.set("columns", "sales_id,customer_id"); conf.set("columns.types", "bigint,bigint"); } else if ("github".equals(dataset)) { conf.set("columns", "actor,created_at"); conf.set("columns.types", "struct<avatar_url:string,gravatar_id:string," + "id:int,login:string,url:string>,timestamp"); } else {/*w ww . ja va 2 s. c o m*/ throw new IllegalArgumentException("Unknown data set " + dataset); } Path path = Utilities.getVariant(root, dataset, "parquet", compression); FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class); statistics.reset(); ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class); NullWritable nada = NullWritable.get(); FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {}); org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper( inputFormat, split, conf, Reporter.NULL); ArrayWritable value = recordReader.createValue(); while (recordReader.next(nada, value)) { counters.addRecords(1); } recordReader.close(); counters.addBytes(statistics.getReadOps(), statistics.getBytesRead()); counters.addInvocation(); }
From source file:org.apache.orc.bench.hive.FullReadBenchmark.java
License:Apache License
@Benchmark public void parquet(ReadCounters counters) throws Exception { JobConf conf = new JobConf(); conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName()); conf.set("fs.defaultFS", "track:///"); Path path = Utilities.getVariant(root, dataset, "parquet", compression); FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class); statistics.reset();// w w w .j a v a 2s . co m ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class); NullWritable nada = NullWritable.get(); FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {}); org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper( inputFormat, split, conf, Reporter.NULL); ArrayWritable value = recordReader.createValue(); while (recordReader.next(nada, value)) { counters.addRecords(1); } recordReader.close(); counters.addBytes(statistics.getReadOps(), statistics.getBytesRead()); counters.addInvocation(); }
From source file:org.apache.orc.mapred.TestOrcOutputFormat.java
License:Apache License
@Test public void testAllTypes() throws Exception { conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0"); conf.setOutputCommitter(NullOutputCommitter.class); final String typeStr = "struct<b1:binary,b2:boolean,b3:tinyint," + "c:char(10),d1:date,d2:decimal(20,5),d3:double,fff:float,int:int," + "l:array<bigint>,map:map<smallint,string>," + "str:struct<u:uniontype<timestamp,varchar(100)>>,ts:timestamp>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); FileOutputFormat.setOutputPath(conf, workDir); TypeDescription type = TypeDescription.fromString(typeStr); // build a row object OrcStruct row = (OrcStruct) OrcStruct.createValue(type); ((BytesWritable) row.getFieldValue(0)).set(new byte[] { 1, 2, 3, 4 }, 0, 4); ((BooleanWritable) row.getFieldValue(1)).set(true); ((ByteWritable) row.getFieldValue(2)).set((byte) 23); ((Text) row.getFieldValue(3)).set("aaabbbcccddd"); SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); ((DateWritable) row.getFieldValue(4)).set(DateWritable.millisToDays(format.parse("2016-04-01").getTime())); ((HiveDecimalWritable) row.getFieldValue(5)).set(new HiveDecimalWritable("1.23")); ((DoubleWritable) row.getFieldValue(6)).set(1.5); ((FloatWritable) row.getFieldValue(7)).set(4.5f); ((IntWritable) row.getFieldValue(8)).set(31415); OrcList<LongWritable> longList = (OrcList<LongWritable>) row.getFieldValue(9); longList.add(new LongWritable(123)); longList.add(new LongWritable(456)); OrcMap<ShortWritable, Text> map = (OrcMap<ShortWritable, Text>) row.getFieldValue(10); map.put(new ShortWritable((short) 1000), new Text("aaaa")); map.put(new ShortWritable((short) 123), new Text("bbbb")); OrcStruct struct = (OrcStruct) row.getFieldValue(11); OrcUnion union = (OrcUnion) struct.getFieldValue(0); union.set((byte) 1, new Text("abcde")); ((OrcTimestamp) row.getFieldValue(12)).set("1996-12-11 15:00:00"); NullWritable nada = NullWritable.get(); RecordWriter<NullWritable, OrcStruct> writer = new OrcOutputFormat<OrcStruct>().getRecordWriter(fs, conf, "all.orc", Reporter.NULL); for (int r = 0; r < 10; ++r) { row.setFieldValue(8, new IntWritable(r * 10)); writer.write(nada, row);/*from w w w .j a v a 2s .c o m*/ } union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56")); for (int r = 0; r < 10; ++r) { row.setFieldValue(8, new IntWritable(r * 10 + 100)); writer.write(nada, row); } OrcStruct row2 = new OrcStruct(type); writer.write(nada, row2); row.setFieldValue(8, new IntWritable(210)); writer.write(nada, row); writer.close(Reporter.NULL); FileSplit split = new FileSplit(new Path(workDir, "all.orc"), 0, 100000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().getRecordReader(split, conf, Reporter.NULL); nada = reader.createKey(); row = reader.createValue(); for (int r = 0; r < 22; ++r) { assertEquals(true, reader.next(nada, row)); if (r == 20) { for (int c = 0; c < 12; ++c) { assertEquals(null, row.getFieldValue(c)); } } else { assertEquals(new BytesWritable(new byte[] { 1, 2, 3, 4 }), row.getFieldValue(0)); assertEquals(new BooleanWritable(true), row.getFieldValue(1)); assertEquals(new ByteWritable((byte) 23), row.getFieldValue(2)); assertEquals(new Text("aaabbbcccd"), row.getFieldValue(3)); assertEquals(new DateWritable(DateWritable.millisToDays(format.parse("2016-04-01").getTime())), row.getFieldValue(4)); assertEquals(new HiveDecimalWritable("1.23"), row.getFieldValue(5)); assertEquals(new DoubleWritable(1.5), row.getFieldValue(6)); assertEquals(new FloatWritable(4.5f), row.getFieldValue(7)); assertEquals(new IntWritable(r * 10), row.getFieldValue(8)); assertEquals(longList, row.getFieldValue(9)); assertEquals(map, row.getFieldValue(10)); if (r < 10) { union.set((byte) 1, new Text("abcde")); } else { union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56")); } assertEquals("row " + r, struct, row.getFieldValue(11)); assertEquals("row " + r, new OrcTimestamp("1996-12-11 15:00:00"), row.getFieldValue(12)); } } assertEquals(false, reader.next(nada, row)); }
From source file:org.apache.orc.mapred.TestOrcOutputFormat.java
License:Apache License
/** * Test the case where the top level isn't a struct, but a long. *//*www .jav a 2 s. c o m*/ @Test public void testLongRoot() throws Exception { conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0"); conf.setOutputCommitter(NullOutputCommitter.class); conf.set(OrcConf.COMPRESS.getAttribute(), "SNAPPY"); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setInt(OrcConf.BUFFER_SIZE.getAttribute(), 64 * 1024); conf.set(OrcConf.WRITE_FORMAT.getAttribute(), "0.11"); final String typeStr = "bigint"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); FileOutputFormat.setOutputPath(conf, workDir); TypeDescription type = TypeDescription.fromString(typeStr); LongWritable value = new LongWritable(); NullWritable nada = NullWritable.get(); RecordWriter<NullWritable, LongWritable> writer = new OrcOutputFormat<LongWritable>().getRecordWriter(fs, conf, "long.orc", Reporter.NULL); for (long lo = 0; lo < 2000; ++lo) { value.set(lo); writer.write(nada, value); } writer.close(Reporter.NULL); Path path = new Path(workDir, "long.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(CompressionKind.SNAPPY, file.getCompressionKind()); assertEquals(2000, file.getNumberOfRows()); assertEquals(1000, file.getRowIndexStride()); assertEquals(64 * 1024, file.getCompressionSize()); assertEquals(OrcFile.Version.V_0_11, file.getFileVersion()); FileSplit split = new FileSplit(path, 0, 100000, new String[0]); RecordReader<NullWritable, LongWritable> reader = new OrcInputFormat<LongWritable>().getRecordReader(split, conf, Reporter.NULL); nada = reader.createKey(); value = reader.createValue(); for (long lo = 0; lo < 2000; ++lo) { assertEquals(true, reader.next(nada, value)); assertEquals(lo, value.get()); } assertEquals(false, reader.next(nada, value)); }
From source file:org.apache.orc.mapred.TestOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcKey * @throws Exception//www. j av a 2 s.com */ @Test public void testOrcKey() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.set("mapreduce.task.attempt.id", "attempt_jt0_0_m_0_0"); String TYPE_STRING = "struct<i:int,s:string>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setOutputCommitter(NullOutputCommitter.class); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcKey key = new OrcKey(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(fs, conf, "key.orc", Reporter.NULL); NullWritable nada = NullWritable.get(); for (int r = 0; r < 2000; ++r) { ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r))); writer.write(nada, key); } writer.close(Reporter.NULL); Path path = new Path(workDir, "key.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(2000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }
From source file:org.apache.orc.mapred.TestOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcValue * @throws Exception/*ww w. j ava 2 s. co m*/ */ @Test public void testOrcValue() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.set("mapreduce.task.attempt.id", "attempt_jt0_0_m_0_0"); String TYPE_STRING = "struct<i:int>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setOutputCommitter(NullOutputCommitter.class); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcValue value = new OrcValue(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(fs, conf, "value.orc", Reporter.NULL); NullWritable nada = NullWritable.get(); for (int r = 0; r < 3000; ++r) { ((OrcStruct) value.value).setAllFields(new IntWritable(r)); writer.write(nada, value); } writer.close(Reporter.NULL); Path path = new Path(workDir, "value.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(3000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }
From source file:org.apache.phoenix.hive.mapreduce.PhoenixRecordWriter.java
License:Apache License
@Override public void close(boolean abort) throws IOException { close(Reporter.NULL); }
From source file:org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerLocal.java
License:Apache License
private void partitionTextCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) throws DMLRuntimeException { long row = -1; long col = -1; try {// w w w . j a v a 2 s . c om //STEP 1: read matrix from HDFS and write blocks to local staging area //check and add input path JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job); InputSplit[] splits = informat.getSplits(job, 1); LinkedList<Cell> buffer = new LinkedList<>(); LongWritable key = new LongWritable(); Text value = new Text(); FastStringTokenizer st = new FastStringTokenizer(' '); for (InputSplit split : splits) { RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL); try { while (reader.next(key, value)) { st.reset(value.toString()); //reset tokenizer row = st.nextLong(); col = st.nextLong(); double lvalue = st.nextDouble(); Cell tmp = new Cell(row, col, lvalue); buffer.addLast(tmp); if (buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) //periodic flush { appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen); buffer.clear(); } } //final flush if (!buffer.isEmpty()) { appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen); buffer.clear(); } } finally { IOUtilFunctions.closeSilently(reader); } } //STEP 2: read matrix blocks from staging area and write matrix to HDFS String[] fnamesPartitions = new File(fnameStaging).list(); if (PARALLEL) { int len = Math.min(fnamesPartitions.length, _par); Thread[] threads = new Thread[len]; for (int i = 0; i < len; i++) { int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len); int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1; end = Math.min(end, fnamesPartitions.length - 1); threads[i] = new Thread(new DataPartitionerWorkerTextCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end)); threads[i].start(); } for (Thread t : threads) t.join(); } else { for (String pdir : fnamesPartitions) writeTextCellFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir); } } catch (Exception e) { //post-mortem error handling and bounds checking if (row < 1 || row > rlen || col < 1 || col > clen) { throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } else throw new DMLRuntimeException("Unable to partition text cell matrix.", e); } }
From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java
License:Apache License
/** * Get the list of hostnames where the input split is located. *///from w ww .j a v a 2s .c o m @Override public String[] getLocations() throws IOException { //Timing time = new Timing(); //time.start(); JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = IOUtilFunctions.getFileSystem(getPath(), job); //read task string LongWritable key = new LongWritable(); Text value = new Text(); RecordReader<LongWritable, Text> reader = null; try { reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL); reader.next(key, value); } finally { IOUtilFunctions.closeSilently(reader); } //parse task Task t = Task.parseCompactString(value.toString()); //get all locations HashMap<String, Integer> hosts = new HashMap<>(); if (t.getType() == TaskType.SET) { for (IntObject val : t.getIterations()) { String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1)); FileStatus status = fs.getFileStatus(new Path(fname)); BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen()); for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts()); } } else //TaskType.RANGE { //since this is a serial process, we use just the first iteration //as a heuristic for location information long lFrom = t.getIterations().get(0).getLongValue(); long lTo = t.getIterations().get(1).getLongValue(); for (long li : new long[] { lFrom, lTo }) { String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1)); FileStatus status = fs.getFileStatus(new Path(fname)); BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen()); for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts()); } } //majority consensus on top host return getTopHosts(hosts); }
From source file:org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile.java
License:Apache License
private static void mergeTextCellWithoutComp(String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO) throws DMLRuntimeException { try {/*from ww w.jav a2 s. c om*/ //delete target file if already exists MapReduceTool.deleteFileIfExistOnHDFS(fnameNew); if (ALLOW_COPY_CELLFILES) { copyAllFiles(fnameNew, inMO); return; //we're done } //actual merge JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fnameNew); FileSystem fs = IOUtilFunctions.getFileSystem(path, job); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); String valueStr = null; try { for (MatrixObject in : inMO) //read/write all inputs { if (LOG.isTraceEnabled()) LOG.trace("ResultMerge (local, file): Merge input " + in.hashCode() + " (fname=" + in.getFileName() + ") via stream merge"); JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf()); Path tmpPath = new Path(in.getFileName()); FileInputFormat.addInputPath(tmpJob, tmpPath); TextInputFormat informat = new TextInputFormat(); informat.configure(tmpJob); InputSplit[] splits = informat.getSplits(tmpJob, 1); LongWritable key = new LongWritable(); Text value = new Text(); for (InputSplit split : splits) { RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, tmpJob, Reporter.NULL); try { while (reader.next(key, value)) { valueStr = value.toString().trim(); out.write(valueStr + "\n"); } } finally { IOUtilFunctions.closeSilently(reader); } } } } finally { IOUtilFunctions.closeSilently(out); } } catch (Exception ex) { throw new DMLRuntimeException("Unable to merge text cell results.", ex); } }