Example usage for org.apache.hadoop.mapred Reporter NULL

List of usage examples for org.apache.hadoop.mapred Reporter NULL

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter NULL.

Prototype

Reporter NULL

To view the source code for org.apache.hadoop.mapred Reporter NULL.

Click Source Link

Document

A constant of Reporter type that does nothing.

Usage

From source file:crunch.MaxTemperature.java

License:Apache License

private void checkRecordReader(InputFormat<LongWritable, Text> inputFormat, InputSplit split, JobConf job,
            long recordLength, int startLine, int endLine) throws IOException {
        RecordReader<LongWritable, Text> recordReader = inputFormat.getRecordReader(split, job, Reporter.NULL);
        for (int i = startLine; i < endLine; i++) {
            checkRecord(i, recordReader, i * recordLength, line(i, recordLength));
        }//w w  w.  j  av  a  2s . co m
        assertThat(recordReader.next(new LongWritable(), new Text()), is(false));
    }

From source file:de.tudarmstadt.ukp.dkpro.bigdata.io.hadoop.InputFormatTest.java

License:Apache License

public int readArchiveInSplits(String archiveFile, int bytesPerSplit,
        InputFormat<Text, CrawlerRecord> inputFormat, JobConf job) throws IOException {
    Path filePath = new Path(archiveFile);
    File file = new File(archiveFile);

    int numRecordsRead = 0;

    System.out.println("Reading archive of size " + file.length() + " in splits of size " + bytesPerSplit);
    for (int offset = 0; offset < file.length(); offset += bytesPerSplit) {
        //         System.out.println("Read from " + offset + " to " + (offset + bytesPerSplit));
        FileSplit inputSplit = new FileSplit(filePath, offset, bytesPerSplit, (String[]) null);
        RecordReader<Text, CrawlerRecord> recordReader = inputFormat.getRecordReader(inputSplit, job,
                Reporter.NULL);
        numRecordsRead += checkRecordsRemaining(recordReader);
    }/*  www .ja  v a  2 s  .  c  o m*/

    return numRecordsRead;
}

From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapRed.java

License:Apache License

void waitOutputThreads() {
    try {/*from   w w  w  .  j  av  a2s .  co  m*/
        if (outThread_ == null) {
            // This happens only when reducer has empty input(So reduce() is not
            // called at all in this task). If reducer still generates output,
            // which is very uncommon and we may not have to support this case.
            // So we don't write this output to HDFS, but we consume/collect
            // this output just to avoid reducer hanging forever.

            OutputCollector collector = new OutputCollector() {
                public void collect(Object key, Object value) throws IOException {
                    //just consume it, no need to write the record anywhere
                }
            };
            Reporter reporter = Reporter.NULL;//dummy reporter
            startOutputThreads(collector, reporter);
        }
        int exitVal = sim.waitFor();
        // how'd it go?
        if (exitVal != 0) {
            if (nonZeroExitIsFailure_) {
                throw new RuntimeException(
                        "PipeMapRed.waitOutputThreads(): subprocess failed with code " + exitVal);
            } else {
                logprintln("PipeMapRed.waitOutputThreads(): subprocess exited with code " + exitVal + " in "
                        + PipeMapRed.class.getName());
            }
        }
        if (outThread_ != null) {
            outThread_.join(joinDelay_);
        }
        if (errThread_ != null) {
            errThread_.join(joinDelay_);
        }
        if (outerrThreadsThrowable != null) {
            throw new RuntimeException(outerrThreadsThrowable);
        }
    } catch (InterruptedException e) {
        //ignore
    }
}

From source file:edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
        throws HyracksDataException {
    final InputSplit[] inputSplits = splitsFactory.getSplits();

    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        private String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();

        @SuppressWarnings("unchecked")
        @Override//from w  ww. j  a  v a2 s.c o m
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                JobConf conf = confFactory.getConf();
                conf.setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                writer.open();
                parser.open(writer);
                InputFormat inputFormat = conf.getInputFormat();
                for (int i = 0; i < inputSplits.length; i++) {
                    /**
                     * read all the partitions scheduled to the current node
                     */
                    if (scheduledLocations[i].equals(nodeName)) {
                        /**
                         * pick an unread split to read
                         * synchronize among simultaneous partitions in the same machine
                         */
                        synchronized (executed) {
                            if (executed[i] == false) {
                                executed[i] = true;
                            } else {
                                continue;
                            }
                        }

                        /**
                         * read the split
                         */
                        RecordReader reader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);
                        Object key = reader.createKey();
                        Object value = reader.createValue();
                        while (reader.next(key, value) == true) {
                            parser.parse(key, value, writer, inputSplits[i].toString());
                        }
                    }
                }
                parser.close(writer);
                writer.close();
            } catch (Exception e) {
                throw new HyracksDataException(e);
            } finally {
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}

From source file:gobblin.source.extractor.hadoop.OldApiHadoopFileInputSource.java

License:Apache License

@Override
public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException {
    if (!workUnitState.contains(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY)) {
        throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId());
    }/* w ww.  ja v  a  2 s.  c o  m*/

    JobConf jobConf = new JobConf(new Configuration());
    for (String key : workUnitState.getPropertyNames()) {
        jobConf.set(key, workUnitState.getProp(key));
    }

    String fileSplitBytesStr = workUnitState.getProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY);
    FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr);
    FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, jobConf);
    RecordReader<K, V> recordReader = fileInputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
    boolean readKeys = workUnitState.getPropAsBoolean(HadoopFileInputSource.FILE_INPUT_READ_KEYS_KEY,
            HadoopFileInputSource.DEFAULT_FILE_INPUT_READ_KEYS);
    return getExtractor(workUnitState, recordReader, fileSplit, readKeys);
}

From source file:io.prestosql.plugin.hive.HiveUtil.java

License:Apache License

public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start,
        long length, Properties schema, List<HiveColumnHandle> columns) {
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns = ImmutableList
            .copyOf(filter(columns, column -> column.getColumnType() == REGULAR));
    List<Integer> readHiveColumnIndexes = ImmutableList
            .copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));

    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
    setReadColumns(configuration, readHiveColumnIndexes);

    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true);
    JobConf jobConf = toJobConf(configuration);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);

    // propagate serialization configuration to getRecordReader
    schema.stringPropertyNames().stream().filter(name -> name.startsWith("serialization."))
            .forEach(name -> jobConf.set(name, schema.getProperty(name)));

    // add Airlift LZO and LZOP to head of codecs list so as to not override existing entries
    List<String> codecs = newArrayList(
            Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", "")));
    if (!codecs.contains(LzoCodec.class.getName())) {
        codecs.add(0, LzoCodec.class.getName());
    }/*from w ww  . jav a  2 s . c o m*/
    if (!codecs.contains(LzopCodec.class.getName())) {
        codecs.add(0, LzopCodec.class.getName());
    }
    jobConf.set("io.compression.codecs", codecs.stream().collect(joining(",")));

    try {
        RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat
                .getRecordReader(fileSplit, jobConf, Reporter.NULL);

        int headerCount = getHeaderCount(schema);
        if (headerCount > 0) {
            Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(),
                    recordReader.createValue());
        }

        int footerCount = getFooterCount(schema);
        if (footerCount > 0) {
            recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf);
        }

        return recordReader;
    } catch (IOException e) {
        if (e instanceof TextLineLengthLimitExceededException) {
            throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e);
        }

        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT,
                format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length,
                        getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())),
                e);
    }
}

From source file:io.prestosql.plugin.hive.HiveWriteUtils.java

License:Apache License

public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties,
        String outputFormatName, ConnectorSession session) {
    try {//from   w w  w . j a  v a2 s  .  c  o m
        boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
        if (outputFormatName.equals(RCFileOutputFormat.class.getName())) {
            return createRcFileWriter(target, conf, properties, compress);
        }
        if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
            return createParquetWriter(target, conf, properties, compress, session);
        }
        Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
        return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress,
                properties, Reporter.NULL);
    } catch (IOException | ReflectiveOperationException e) {
        throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
    }
}

From source file:it.crs4.pydoop.pipes.PipesReducer.java

License:Apache License

/**
 * Handle the end of the input by closing down the application.
 *///from ww w . j a v a2s .  c om
public void close() throws IOException {
    // if we haven't started the application, we have nothing to do
    if (isOk) {
        OutputCollector<K3, V3> nullCollector = new OutputCollector<K3, V3>() {
            public void collect(K3 key, V3 value) throws IOException {
                // NULL
            }
        };
        startApplication(nullCollector, Reporter.NULL);
    }
    try {
        if (isOk) {
            application.getDownlink().endOfInput();
        } else {
            // send the abort to the application and let it clean up
            application.getDownlink().abort();
        }
        LOG.info("waiting for finish");
        application.waitForFinish();
        LOG.info("got done");
    } catch (Throwable t) {
        application.abort(t);
    } finally {
        application.cleanup();
    }
}

From source file:net.iponweb.hadoop.streaming.io.ByKeyOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, Text> getRecordWriter(final FileSystem fs, final JobConf job, String name,
        final Progressable progressable) throws IOException {
    initialize(job);//from   w  ww  .  j a  v a 2s. co  m
    return new RecordWriter<Text, Text>() {
        private RecordWriter<Text, Text> currentWriter;
        private String currentTextey;
        private TreeMap<String, RecordWriter<Text, Text>> recordWriterByTexteys = new TreeMap<String, RecordWriter<Text, Text>>();

        @Override
        public void write(Text key, Text value) throws IOException {
            String fileName = generateFileNameForTexteyTextalue(key, value);
            if (assumeFileNamesSorted) {
                if (!fileName.equals(currentTextey)) {
                    if (currentWriter != null) {
                        currentWriter.close(Reporter.NULL);
                    }
                    currentWriter = getBaseRecordWriter(fs, job, fileName, progressable);
                    currentTextey = fileName;
                }
                currentWriter.write(key, value);
            } else {
                RecordWriter<Text, Text> writer = recordWriterByTexteys.get(fileName);
                if (writer == null) {
                    writer = getBaseRecordWriter(fs, job, fileName, progressable);
                    recordWriterByTexteys.put(fileName, writer);
                }
                writer.write(key, value);
            }
            progressable.progress();
        }

        @Override
        public void close(Reporter reporter) throws IOException {
            if (currentWriter != null) {
                currentWriter.close(reporter);
            }
            for (RecordWriter<Text, Text> writer : recordWriterByTexteys.values()) {
                writer.close(reporter);
            }
        }
    };
}

From source file:org.apache.asterix.external.input.HDFSDataSourceFactory.java

License:Apache License

@Override
public void configure(Map<String, String> configuration) throws AsterixException {
    try {//from   w w  w .  j  a v a  2  s  . co m
        init();
        this.configuration = configuration;
        JobConf conf = HDFSUtils.configureHDFSJobConf(configuration);
        confFactory = new ConfFactory(conf);
        clusterLocations = getPartitionConstraint();
        int numPartitions = clusterLocations.getLocations().length;
        // if files list was set, we restrict the splits to the list
        InputSplit[] inputSplits;
        if (files == null) {
            inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
        } else {
            inputSplits = HDFSUtils.getSplits(conf, files);
        }
        if (indexingOp) {
            readSchedule = indexingScheduler.getLocationConstraints(inputSplits);
        } else {
            readSchedule = hdfsScheduler.getLocationConstraints(inputSplits);
        }
        inputSplitsFactory = new InputSplitsFactory(inputSplits);
        read = new boolean[readSchedule.length];
        Arrays.fill(read, false);
        String formatString = configuration.get(ExternalDataConstants.KEY_FORMAT);
        if (formatString == null || formatString.equals(ExternalDataConstants.FORMAT_HDFS_WRITABLE)) {
            RecordReader<?, ?> reader = conf.getInputFormat().getRecordReader(inputSplits[0], conf,
                    Reporter.NULL);
            this.recordClass = reader.createValue().getClass();
            reader.close();
        } else {
            format = StreamRecordReaderProvider.getReaderFormat(configuration);
            this.recordClass = char[].class;
        }
    } catch (IOException e) {
        throw new AsterixException(e);
    }
}