List of usage examples for org.apache.hadoop.mapred Reporter NULL
Reporter NULL
To view the source code for org.apache.hadoop.mapred Reporter NULL.
Click Source Link
From source file:crunch.MaxTemperature.java
License:Apache License
private void checkRecordReader(InputFormat<LongWritable, Text> inputFormat, InputSplit split, JobConf job, long recordLength, int startLine, int endLine) throws IOException { RecordReader<LongWritable, Text> recordReader = inputFormat.getRecordReader(split, job, Reporter.NULL); for (int i = startLine; i < endLine; i++) { checkRecord(i, recordReader, i * recordLength, line(i, recordLength)); }//w w w. j av a 2s . co m assertThat(recordReader.next(new LongWritable(), new Text()), is(false)); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.io.hadoop.InputFormatTest.java
License:Apache License
public int readArchiveInSplits(String archiveFile, int bytesPerSplit, InputFormat<Text, CrawlerRecord> inputFormat, JobConf job) throws IOException { Path filePath = new Path(archiveFile); File file = new File(archiveFile); int numRecordsRead = 0; System.out.println("Reading archive of size " + file.length() + " in splits of size " + bytesPerSplit); for (int offset = 0; offset < file.length(); offset += bytesPerSplit) { // System.out.println("Read from " + offset + " to " + (offset + bytesPerSplit)); FileSplit inputSplit = new FileSplit(filePath, offset, bytesPerSplit, (String[]) null); RecordReader<Text, CrawlerRecord> recordReader = inputFormat.getRecordReader(inputSplit, job, Reporter.NULL); numRecordsRead += checkRecordsRemaining(recordReader); }/* www .ja v a 2 s . c o m*/ return numRecordsRead; }
From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapRed.java
License:Apache License
void waitOutputThreads() { try {/*from w w w . j av a2s . co m*/ if (outThread_ == null) { // This happens only when reducer has empty input(So reduce() is not // called at all in this task). If reducer still generates output, // which is very uncommon and we may not have to support this case. // So we don't write this output to HDFS, but we consume/collect // this output just to avoid reducer hanging forever. OutputCollector collector = new OutputCollector() { public void collect(Object key, Object value) throws IOException { //just consume it, no need to write the record anywhere } }; Reporter reporter = Reporter.NULL;//dummy reporter startOutputThreads(collector, reporter); } int exitVal = sim.waitFor(); // how'd it go? if (exitVal != 0) { if (nonZeroExitIsFailure_) { throw new RuntimeException( "PipeMapRed.waitOutputThreads(): subprocess failed with code " + exitVal); } else { logprintln("PipeMapRed.waitOutputThreads(): subprocess exited with code " + exitVal + " in " + PipeMapRed.class.getName()); } } if (outThread_ != null) { outThread_.join(joinDelay_); } if (errThread_ != null) { errThread_.join(joinDelay_); } if (outerrThreadsThrowable != null) { throw new RuntimeException(outerrThreadsThrowable); } } catch (InterruptedException e) { //ignore } }
From source file:edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException { final InputSplit[] inputSplits = splitsFactory.getSplits(); return new AbstractUnaryOutputSourceOperatorNodePushable() { private String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId(); @SuppressWarnings("unchecked") @Override//from w ww. j a v a2 s.c o m public void initialize() throws HyracksDataException { ClassLoader ctxCL = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader()); JobConf conf = confFactory.getConf(); conf.setClassLoader(ctx.getJobletContext().getClassLoader()); IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx); writer.open(); parser.open(writer); InputFormat inputFormat = conf.getInputFormat(); for (int i = 0; i < inputSplits.length; i++) { /** * read all the partitions scheduled to the current node */ if (scheduledLocations[i].equals(nodeName)) { /** * pick an unread split to read * synchronize among simultaneous partitions in the same machine */ synchronized (executed) { if (executed[i] == false) { executed[i] = true; } else { continue; } } /** * read the split */ RecordReader reader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); while (reader.next(key, value) == true) { parser.parse(key, value, writer, inputSplits[i].toString()); } } } parser.close(writer); writer.close(); } catch (Exception e) { throw new HyracksDataException(e); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } } }; }
From source file:gobblin.source.extractor.hadoop.OldApiHadoopFileInputSource.java
License:Apache License
@Override public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException { if (!workUnitState.contains(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY)) { throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId()); }/* w ww. ja v a 2 s. c o m*/ JobConf jobConf = new JobConf(new Configuration()); for (String key : workUnitState.getPropertyNames()) { jobConf.set(key, workUnitState.getProp(key)); } String fileSplitBytesStr = workUnitState.getProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY); FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr); FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, jobConf); RecordReader<K, V> recordReader = fileInputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL); boolean readKeys = workUnitState.getPropAsBoolean(HadoopFileInputSource.FILE_INPUT_READ_KEYS_KEY, HadoopFileInputSource.DEFAULT_FILE_INPUT_READ_KEYS); return getExtractor(workUnitState, recordReader, fileSplit, readKeys); }
From source file:io.prestosql.plugin.hive.HiveUtil.java
License:Apache License
public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns) { // determine which hive columns we will read List<HiveColumnHandle> readColumns = ImmutableList .copyOf(filter(columns, column -> column.getColumnType() == REGULAR)); List<Integer> readHiveColumnIndexes = ImmutableList .copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex)); // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files setReadColumns(configuration, readHiveColumnIndexes); InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true); JobConf jobConf = toJobConf(configuration); FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null); // propagate serialization configuration to getRecordReader schema.stringPropertyNames().stream().filter(name -> name.startsWith("serialization.")) .forEach(name -> jobConf.set(name, schema.getProperty(name))); // add Airlift LZO and LZOP to head of codecs list so as to not override existing entries List<String> codecs = newArrayList( Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", ""))); if (!codecs.contains(LzoCodec.class.getName())) { codecs.add(0, LzoCodec.class.getName()); }/*from w ww . jav a 2 s . c o m*/ if (!codecs.contains(LzopCodec.class.getName())) { codecs.add(0, LzopCodec.class.getName()); } jobConf.set("io.compression.codecs", codecs.stream().collect(joining(","))); try { RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat .getRecordReader(fileSplit, jobConf, Reporter.NULL); int headerCount = getHeaderCount(schema); if (headerCount > 0) { Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(), recordReader.createValue()); } int footerCount = getFooterCount(schema); if (footerCount > 0) { recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf); } return recordReader; } catch (IOException e) { if (e instanceof TextLineLengthLimitExceededException) { throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e); } throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())), e); } }
From source file:io.prestosql.plugin.hive.HiveWriteUtils.java
License:Apache License
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session) { try {//from w w w . j a v a2 s . c o m boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT); if (outputFormatName.equals(RCFileOutputFormat.class.getName())) { return createRcFileWriter(target, conf, properties, compress); } if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) { return createParquetWriter(target, conf, properties, compress, session); } Object writer = Class.forName(outputFormatName).getConstructor().newInstance(); return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL); } catch (IOException | ReflectiveOperationException e) { throw new PrestoException(HIVE_WRITER_DATA_ERROR, e); } }
From source file:it.crs4.pydoop.pipes.PipesReducer.java
License:Apache License
/** * Handle the end of the input by closing down the application. *///from ww w . j a v a2s . c om public void close() throws IOException { // if we haven't started the application, we have nothing to do if (isOk) { OutputCollector<K3, V3> nullCollector = new OutputCollector<K3, V3>() { public void collect(K3 key, V3 value) throws IOException { // NULL } }; startApplication(nullCollector, Reporter.NULL); } try { if (isOk) { application.getDownlink().endOfInput(); } else { // send the abort to the application and let it clean up application.getDownlink().abort(); } LOG.info("waiting for finish"); application.waitForFinish(); LOG.info("got done"); } catch (Throwable t) { application.abort(t); } finally { application.cleanup(); } }
From source file:net.iponweb.hadoop.streaming.io.ByKeyOutputFormat.java
License:Apache License
@Override public RecordWriter<Text, Text> getRecordWriter(final FileSystem fs, final JobConf job, String name, final Progressable progressable) throws IOException { initialize(job);//from w ww . j a v a 2s. co m return new RecordWriter<Text, Text>() { private RecordWriter<Text, Text> currentWriter; private String currentTextey; private TreeMap<String, RecordWriter<Text, Text>> recordWriterByTexteys = new TreeMap<String, RecordWriter<Text, Text>>(); @Override public void write(Text key, Text value) throws IOException { String fileName = generateFileNameForTexteyTextalue(key, value); if (assumeFileNamesSorted) { if (!fileName.equals(currentTextey)) { if (currentWriter != null) { currentWriter.close(Reporter.NULL); } currentWriter = getBaseRecordWriter(fs, job, fileName, progressable); currentTextey = fileName; } currentWriter.write(key, value); } else { RecordWriter<Text, Text> writer = recordWriterByTexteys.get(fileName); if (writer == null) { writer = getBaseRecordWriter(fs, job, fileName, progressable); recordWriterByTexteys.put(fileName, writer); } writer.write(key, value); } progressable.progress(); } @Override public void close(Reporter reporter) throws IOException { if (currentWriter != null) { currentWriter.close(reporter); } for (RecordWriter<Text, Text> writer : recordWriterByTexteys.values()) { writer.close(reporter); } } }; }
From source file:org.apache.asterix.external.input.HDFSDataSourceFactory.java
License:Apache License
@Override public void configure(Map<String, String> configuration) throws AsterixException { try {//from w w w . j a v a 2 s . co m init(); this.configuration = configuration; JobConf conf = HDFSUtils.configureHDFSJobConf(configuration); confFactory = new ConfFactory(conf); clusterLocations = getPartitionConstraint(); int numPartitions = clusterLocations.getLocations().length; // if files list was set, we restrict the splits to the list InputSplit[] inputSplits; if (files == null) { inputSplits = conf.getInputFormat().getSplits(conf, numPartitions); } else { inputSplits = HDFSUtils.getSplits(conf, files); } if (indexingOp) { readSchedule = indexingScheduler.getLocationConstraints(inputSplits); } else { readSchedule = hdfsScheduler.getLocationConstraints(inputSplits); } inputSplitsFactory = new InputSplitsFactory(inputSplits); read = new boolean[readSchedule.length]; Arrays.fill(read, false); String formatString = configuration.get(ExternalDataConstants.KEY_FORMAT); if (formatString == null || formatString.equals(ExternalDataConstants.FORMAT_HDFS_WRITABLE)) { RecordReader<?, ?> reader = conf.getInputFormat().getRecordReader(inputSplits[0], conf, Reporter.NULL); this.recordClass = reader.createValue().getClass(); reader.close(); } else { format = StreamRecordReaderProvider.getReaderFormat(configuration); this.recordClass = char[].class; } } catch (IOException e) { throw new AsterixException(e); } }