com.github.bskaggs.mapreduce.flowfile.AbstractFlowFileV3RecordReader.java Source code

Java tutorial

Introduction

Here is the source code for com.github.bskaggs.mapreduce.flowfile.AbstractFlowFileV3RecordReader.java

Source

package com.github.bskaggs.mapreduce.flowfile;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public abstract class AbstractFlowFileV3RecordReader<V> extends RecordReader<Map<String, String>, V> {
    public static final byte[] MAGIC_HEADER = { 'N', 'i', 'F', 'i', 'F', 'F', '3' };
    private final byte[] headerBuffer = new byte[MAGIC_HEADER.length];

    private FSDataInputStream fileStream;
    private final Map<String, String> key = new HashMap<>();
    protected V value;

    private long startPos;
    private long nextPos;
    private long lastPos;

    private long length;

    protected Map<String, String> readAttributes(Map<String, String> attributes, final FSDataInputStream in)
            throws IOException {
        final int numAttributes = readFieldLength(in); // read number of attributes
        if (numAttributes == 0) {
            throw new IOException("flow files cannot have zero attributes");
        }
        for (int i = 0; i < numAttributes; i++) { // read each attribute as key/value pair
            final String key = readString(in);
            final String value = readString(in);
            attributes.put(key, value);
        }
        return attributes;
    }

    protected String readString(final FSDataInputStream in) throws IOException {
        final int numBytes = readFieldLength(in);
        final byte[] bytes = new byte[numBytes];
        in.readFully(bytes);
        return new String(bytes, StandardCharsets.UTF_8);
    }

    protected int readFieldLength(final FSDataInputStream in) throws IOException {
        int value = in.readUnsignedShort();
        if (value == 0xFFFF) {
            value = in.readInt();
        }
        return value;
    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (nextPos >= lastPos) {
            return false;
        }

        if (fileStream.getPos() != nextPos) {
            fileStream.seek(nextPos);
        }

        fileStream.readFully(headerBuffer);
        if (!Arrays.equals(headerBuffer, MAGIC_HEADER)) {
            throw new IOException("Not in FlowFile-v3 format");
        }

        key.clear();
        readAttributes(key, fileStream);

        long byteLength = fileStream.readLong();
        nextPos = fileStream.getPos() + byteLength;

        value = nextValue(fileStream, byteLength);
        return true;
    }

    abstract protected V nextValue(FSDataInputStream fileStream, long byteLength) throws IOException;

    @Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
        FileSplit fileSplit = (FileSplit) split;

        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(context.getConfiguration());
        fileStream = fs.open(file);

        startPos = fileSplit.getStart();
        nextPos = startPos;
        length = fileSplit.getLength();
        lastPos = nextPos + length;
    }

    @Override
    public Map<String, String> getCurrentKey() throws IOException, InterruptedException {
        return key;
    }

    @Override
    public V getCurrentValue() throws IOException, InterruptedException {
        return value;
    }

    @Override
    public float getProgress() throws IOException {
        if (length == 0) {
            return 0.0f;
        } else {
            return ((float) (fileStream.getPos() - startPos)) / length;
        }
    }

    @Override
    public void close() throws IOException {
        fileStream.close();
    }
}