List of usage examples for org.apache.hadoop.mapred Reporter NULL
Reporter NULL
To view the source code for org.apache.hadoop.mapred Reporter NULL.
Click Source Link
From source file:cascading.tap.hadoop.io.TapOutputCollector.java
License:Open Source License
private Reporter getReporter() { Reporter reporter = Reporter.NULL; if (flowProcess instanceof MapRed) reporter = ((MapRed) flowProcess).getReporter(); // may return Reporter.NULL return reporter; }
From source file:cascading.tap.hadoop.TapCollector.java
License:Open Source License
private void initalize() throws IOException { tap.sinkInit(conf); // tap should not delete if called within a task OutputFormat outputFormat = conf.getOutputFormat(); isFileOutputFormat = outputFormat instanceof FileOutputFormat; if (isFileOutputFormat) { Hadoop18TapUtil.setupJob(conf);//from w ww .j av a 2 s. c o m if (prefix != null) filename = String.format(filenamePattern, prefix, "/", conf.getInt("mapred.task.partition", 0)); else filename = String.format(filenamePattern, "", "", conf.getInt("mapred.task.partition", 0)); Hadoop18TapUtil.setupTask(conf); } writer = outputFormat.getRecordWriter(null, conf, filename, Reporter.NULL); }
From source file:cascading.tap.hadoop.TapIterator.java
License:Open Source License
private RecordReader makeReader(int currentSplit) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("reading split: " + currentSplit); return inputFormat.getRecordReader(splits[currentSplit], conf, Reporter.NULL); }
From source file:cascading.tap.hadoop.ZipInputFormatTest.java
License:Open Source License
public void testSplits() throws Exception { JobConf job = new JobConf(); FileSystem currentFs = FileSystem.get(job); Path file = new Path(workDir, "test.zip"); Reporter reporter = Reporter.NULL; int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); FileInputFormat.setInputPaths(job, file); for (int entries = 1; entries < MAX_ENTRIES; entries += random.nextInt(MAX_ENTRIES / 10) + 1) { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); ZipOutputStream zos = new ZipOutputStream(byteArrayOutputStream); long length = 0; LOG.debug("creating; zip file with entries = " + entries); // for each entry in the zip file for (int entryCounter = 0; entryCounter < entries; entryCounter++) { // construct zip entries splitting MAX_LENGTH between entries long entryLength = MAX_LENGTH / entries; ZipEntry zipEntry = new ZipEntry("/entry" + entryCounter + ".txt"); zipEntry.setMethod(ZipEntry.DEFLATED); zos.putNextEntry(zipEntry);/*from ww w. j av a 2s . c o m*/ for (length = entryCounter * entryLength; length < (entryCounter + 1) * entryLength; length++) { zos.write(Long.toString(length).getBytes()); zos.write("\n".getBytes()); } zos.flush(); zos.closeEntry(); } zos.flush(); zos.close(); currentFs.delete(file, true); OutputStream outputStream = currentFs.create(file); byteArrayOutputStream.writeTo(outputStream); outputStream.close(); ZipInputFormat format = new ZipInputFormat(); format.configure(job); LongWritable key = new LongWritable(); Text value = new Text(); InputSplit[] splits = format.getSplits(job, 100); BitSet bits = new BitSet((int) length); for (int j = 0; j < splits.length; j++) { LOG.debug("split[" + j + "]= " + splits[j]); RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { int v = Integer.parseInt(value.toString()); LOG.debug("read " + v); if (bits.get(v)) LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos()); assertFalse("key in multiple partitions.", bits.get(v)); bits.set(v); count++; } LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.close(); } } assertEquals("some keys in no partition.", length, bits.cardinality()); } }
From source file:cn.scala.es.ReportingUtils.java
License:Apache License
@SuppressWarnings({ "rawtypes" }) static void report(Progressable progressable, Stats stats) { progressable = (Progressable) CompatHandler.unwrap(progressable); if (progressable == null || progressable == Reporter.NULL) { return;// w ww . ja v a 2 s.c o m } if (progressable instanceof Reporter) { Reporter reporter = (Reporter) progressable; for (Counter count : Counter.ALL) { oldApiCounter(reporter, count, count.get(stats)); } } if (progressable instanceof org.apache.hadoop.mapreduce.TaskInputOutputContext) { TaskInputOutputContext compatTioc = CompatHandler .taskInputOutputContext((org.apache.hadoop.mapreduce.TaskInputOutputContext) progressable); for (Counter count : Counter.ALL) { newApiCounter(compatTioc, count, count.get(stats)); } } }
From source file:com.bigdata.diane.MiniTestDFSIO.java
License:Apache License
private static void sequentialTest(FileSystem fs, int testType, int fileSize, int nrFiles) throws Exception { IOStatMapper<Long> ioer = null; if (testType == TEST_TYPE_READ) ioer = new ReadMapper(); else if (testType == TEST_TYPE_WRITE) ioer = new WriteMapper(); else/* w w w . j av a 2s. c om*/ return; for (int i = 0; i < nrFiles; i++) ioer.doIO(Reporter.NULL, BASE_FILE_NAME + Integer.toString(i), MEGA * fileSize); }
From source file:com.cloudera.knittingboar.io.TestInputRecordsSplit.java
License:Apache License
/** * create an InputRecordSplit and then read some records * /*from ww w. j a v a2s .c om*/ * - make sure we maintain split discipline * @throws IOException * */ public void testReadSplitViaInputRecordsSplit() throws IOException { // InputRecordsSplit(JobConf jobConf, InputSplit split) // needs to get a jobConf from somewhere, under the hood // needs a split calculated from the aforementioned jobConf JobConf job = new JobConf(defaultConf); Path file = new Path(workDir, "testReadSplitViaInputRecordsSplit.txt"); int tmp_file_size = 2000; long block_size = localFs.getDefaultBlockSize(); System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB"); Writer writer = new OutputStreamWriter(localFs.create(file)); try { for (int i = 0; i < tmp_file_size; i++) { writer.write( "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 1, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 99"); writer.write("\n"); } } finally { writer.close(); } System.out.println("file write complete, wrote " + tmp_file_size + " recs"); // A reporter that does nothing Reporter reporter = Reporter.NULL; System.out.println("> setting splits for: " + workDir); // localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, file); // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.configure(job); LongWritable key = new LongWritable(); Text value = new Text(); int numSplits = 1; InputSplit[] splits = format.getSplits(job, numSplits); LOG.info("requested " + numSplits + " splits, splitting: got = " + splits.length); System.out.println("---- debug splits --------- "); //InputSplit test_split = null; int total_read = 0; for (int x = 0; x < splits.length; x++) { System.out.println("> Split [" + x + "]: " + splits[x].getLength()); int count = 0; InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[x]); while (custom_reader.next(value)) { count++; // } System.out.println("read: " + count + " records for split " + x); total_read += count; } // for each split System.out.println("--------- total read across all splits: " + total_read); assertEquals(tmp_file_size, total_read); }
From source file:com.cloudera.knittingboar.io.TestInputRecordsSplit.java
License:Apache License
public void testReadSplitViaInputRecordsSplit_SplitReset() throws IOException { // InputRecordsSplit(JobConf jobConf, InputSplit split) // needs to get a jobConf from somewhere, under the hood // needs a split calculated from the aforementioned jobConf JobConf job = new JobConf(defaultConf); Path file = new Path(workDir, "testReadSplitViaInputRecordsSplit_SplitReset"); int tmp_file_size = 2000; long block_size = localFs.getDefaultBlockSize(); System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB"); Writer writer = new OutputStreamWriter(localFs.create(file)); try {/*from www. j ava2 s .c om*/ for (int i = 0; i < tmp_file_size; i++) { writer.write( "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 1, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 99"); writer.write("\n"); } } finally { writer.close(); } System.out.println("file write complete, wrote " + tmp_file_size + " recs"); // A reporter that does nothing Reporter reporter = Reporter.NULL; // localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, file); // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.configure(job); LongWritable key = new LongWritable(); Text value = new Text(); int numSplits = 1; InputSplit[] splits = format.getSplits(job, numSplits); LOG.info("requested " + numSplits + " splits, splitting: got = " + splits.length); System.out.println("---- testReadSplitViaInputRecordsSplit_SplitReset: debug splits --------- "); int total_read = 0; System.out.println("> Split [0]: " + splits[0].getLength()); int count = 0; InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[0]); while (custom_reader.next(value)) { count++; } System.out.println("read: " + count + " records for split " + 0); int count_reset = 0; custom_reader.ResetToStartOfSplit(); while (custom_reader.next(value)) { count_reset++; } System.out.println("read: " + count_reset + " records for split after reset " + 0); assertEquals(count, count_reset); }
From source file:com.cloudera.knittingboar.io.TestSplitCalcs.java
License:Apache License
/** * /* w w w . j ava 2 s . c o m*/ * - use the TextInputFormat.getSplits() to test pulling split info * @throws IOException * */ public void testGetSplits() throws IOException { TextInputFormat input = new TextInputFormat(); JobConf job = new JobConf(defaultConf); Path file = new Path(workDir, "testGetSplits.txt"); int tmp_file_size = 200000; long block_size = localFs.getDefaultBlockSize(); System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB"); Writer writer = new OutputStreamWriter(localFs.create(file)); try { for (int i = 0; i < tmp_file_size; i++) { writer.write( "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 1, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, 99"); writer.write("\n"); } } finally { writer.close(); } System.out.println("file write complete"); // A reporter that does nothing Reporter reporter = Reporter.NULL; // localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, file); // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.configure(job); LongWritable key = new LongWritable(); Text value = new Text(); int numSplits = 1; InputSplit[] splits = format.getSplits(job, numSplits); LOG.info("requested " + numSplits + " splits, splitting: got = " + splits.length); assertEquals(2, splits.length); System.out.println("---- debug splits --------- "); for (int x = 0; x < splits.length; x++) { System.out.println("> Split [" + x + "]: " + splits[x].getLength() + ", " + splits[x].toString() + ", " + splits[x].getLocations()[0]); RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[x], job, reporter); try { int count = 0; while (reader.next(key, value)) { if (count == 0) { System.out.println("first: " + value.toString()); assertTrue(value.toString().contains("a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p")); } count++; } System.out.println("last: " + value.toString()); assertTrue(value.toString().contains("a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p")); } finally { reader.close(); } } // for each split }
From source file:com.cloudera.knittingboar.records.TestTwentyNewsgroupsRecordFactory.java
License:Apache License
public void testRecordFactoryOnDatasetShard() throws Exception { TwentyNewsgroupsRecordFactory rec_factory = new TwentyNewsgroupsRecordFactory("\t"); //rec_factory.setClassSplitString("\t"); JobConf job = new JobConf(defaultConf); Path file = new Path(workDir, "20news-part-0.txt"); int tmp_file_size = 200000; long block_size = localFs.getDefaultBlockSize(); System.out.println("default block size: " + (block_size / 1024 / 1024) + "MB"); // A reporter that does nothing Reporter reporter = Reporter.NULL; FileInputFormat.setInputPaths(job, workDir); // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.configure(job);/*from w w w.j a v a2 s. c om*/ LongWritable key = new LongWritable(); Text value = new Text(); int numSplits = 1; InputSplit[] splits = format.getSplits(job, numSplits); LOG.info("requested " + numSplits + " splits, splitting: got = " + splits.length); System.out.println("---- debug splits --------- "); rec_factory.Debug(); int total_read = 0; long ts_start = System.currentTimeMillis(); for (int x = 0; x < splits.length; x++) { System.out.println("> Split [" + x + "]: " + splits[x].getLength()); int count = 0; InputRecordsSplit custom_reader = new InputRecordsSplit(job, splits[x]); while (custom_reader.next(value)) { Vector v = new RandomAccessSparseVector(TwentyNewsgroupsRecordFactory.FEATURES); rec_factory.processLine(value.toString(), v); count++; //break; } System.out.println("read: " + count + " records for split " + x); total_read += count; } // for each split long ts_total = System.currentTimeMillis() - ts_start; double vectors_per_sec = (double) total_read / ((double) ts_total / 1000); System.out.println("Time: " + ts_total); System.out.println("total recs read across all splits: " + total_read); System.out.println("Vectors converted / sec: " + vectors_per_sec); assertEquals(total_read, 11314); rec_factory.Debug(); }