org.apache.asterix.external.input.stream.HDFSInputStream.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.asterix.external.input.stream.HDFSInputStream.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.asterix.external.input.stream;

import java.io.IOException;
import java.util.List;
import java.util.Map;

import org.apache.asterix.common.exceptions.AsterixException;
import org.apache.asterix.external.api.AsterixInputStream;
import org.apache.asterix.external.api.IExternalIndexer;
import org.apache.asterix.external.api.IIndexingDatasource;
import org.apache.asterix.external.indexing.ExternalFile;
import org.apache.asterix.external.input.record.reader.hdfs.EmptyRecordReader;
import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hyracks.api.exceptions.HyracksDataException;

public class HDFSInputStream extends AsterixInputStream implements IIndexingDatasource {

    private RecordReader<Object, Text> reader;
    private Text value = null;
    private Object key = null;
    private int currentSplitIndex = 0;
    private boolean read[];
    private InputFormat<?, Text> inputFormat;
    private InputSplit[] inputSplits;
    private String[] readSchedule;
    private String nodeName;
    private JobConf conf;
    // Indexing variables
    private final IExternalIndexer indexer;
    private final List<ExternalFile> snapshot;
    private final FileSystem hdfs;
    private int pos = 0;

    @SuppressWarnings("unchecked")
    public HDFSInputStream(boolean read[], InputSplit[] inputSplits, String[] readSchedule, String nodeName,
            JobConf conf, Map<String, String> configuration, List<ExternalFile> snapshot, IExternalIndexer indexer)
            throws IOException, AsterixException {
        this.read = read;
        this.inputSplits = inputSplits;
        this.readSchedule = readSchedule;
        this.nodeName = nodeName;
        this.conf = conf;
        this.inputFormat = conf.getInputFormat();
        this.reader = new EmptyRecordReader<Object, Text>();
        this.snapshot = snapshot;
        this.hdfs = FileSystem.get(conf);
        this.indexer = indexer;
        nextInputSplit();
        this.value = new Text();
        if (snapshot != null) {
            if (currentSplitIndex < snapshot.size()) {
                indexer.reset(this);
            }
        }
    }

    @Override
    public int read() throws IOException {
        if (value.getLength() < pos) {
            if (!readMore()) {
                return -1;
            }
        } else if (value.getLength() == pos) {
            pos++;
            return ExternalDataConstants.BYTE_LF;
        }
        return value.getBytes()[pos++];
    }

    private int readRecord(byte[] buffer, int offset, int len) {
        int actualLength = value.getLength() + 1;
        if ((actualLength - pos) > len) {
            //copy partial record
            System.arraycopy(value.getBytes(), pos, buffer, offset, len);
            pos += len;
            return len;
        } else {
            int numBytes = value.getLength() - pos;
            System.arraycopy(value.getBytes(), pos, buffer, offset, numBytes);
            buffer[offset + numBytes] = ExternalDataConstants.LF;
            pos += numBytes;
            numBytes++;
            return numBytes;
        }
    }

    @Override
    public int read(byte[] buffer, int offset, int len) throws IOException {
        if (value.getLength() > pos) {
            return readRecord(buffer, offset, len);
        }
        if (!readMore()) {
            return -1;
        }
        return readRecord(buffer, offset, len);
    }

    private boolean readMore() throws IOException {
        try {
            pos = 0;
            return HDFSInputStream.this.hasNext();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    @Override
    public boolean stop() throws Exception {
        return false;
    }

    @Override
    public boolean handleException(Throwable th) {
        return false;
    }

    @Override
    public void close() throws IOException {
        reader.close();
    }

    private boolean hasNext() throws Exception {
        if (reader.next(key, value)) {
            return true;
        }
        while (nextInputSplit()) {
            if (reader.next(key, value)) {
                return true;
            }
        }
        return false;
    }

    private boolean nextInputSplit() throws IOException {
        for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
            /**
             * read all the partitions scheduled to the current node
             */
            if (readSchedule[currentSplitIndex].equals(nodeName)) {
                /**
                 * pick an unread split to read synchronize among
                 * simultaneous partitions in the same machine
                 */
                synchronized (read) {
                    if (read[currentSplitIndex] == false) {
                        read[currentSplitIndex] = true;
                    } else {
                        continue;
                    }
                }
                if (snapshot != null) {
                    String fileName = ((FileSplit) (inputSplits[currentSplitIndex])).getPath().toUri().getPath();
                    FileStatus fileStatus = hdfs.getFileStatus(new Path(fileName));
                    // Skip if not the same file stored in the files snapshot
                    if (fileStatus.getModificationTime() != snapshot.get(currentSplitIndex).getLastModefiedTime()
                            .getTime()) {
                        continue;
                    }
                }

                reader.close();
                reader = getRecordReader(currentSplitIndex);
                return true;
            }
        }
        return false;
    }

    @SuppressWarnings("unchecked")
    private RecordReader<Object, Text> getRecordReader(int splitIndex) throws IOException {
        reader = (RecordReader<Object, Text>) inputFormat.getRecordReader(inputSplits[splitIndex], conf,
                Reporter.NULL);
        if (key == null) {
            key = reader.createKey();
            value = reader.createValue();
        }
        if (indexer != null) {
            try {
                indexer.reset(this);
            } catch (Exception e) {
                throw new HyracksDataException(e);
            }
        }
        return reader;
    }

    @Override
    public IExternalIndexer getIndexer() {
        return indexer;
    }

    @Override
    public List<ExternalFile> getSnapshot() {
        return snapshot;
    }

    @Override
    public int getCurrentSplitIndex() {
        return currentSplitIndex;
    }

    @Override
    public RecordReader<?, ? extends Writable> getReader() {
        return reader;
    }
}