com.alibaba.jstorm.hdfs.spout.TextFileReader.java Source code

Java tutorial

Introduction

Here is the source code for com.alibaba.jstorm.hdfs.spout.TextFileReader.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.jstorm.hdfs.spout;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.List;
import java.util.Map;

// Todo: Track file offsets instead of line number
public class TextFileReader extends AbstractFileReader {
    public static final String[] defaultFields = { "line" };
    public static final String CHARSET = "hdfsspout.reader.charset";
    public static final String BUFFER_SIZE = "hdfsspout.reader.buffer.bytes";

    private static final int DEFAULT_BUFF_SIZE = 4096;

    private BufferedReader reader;
    private final Logger LOG = LoggerFactory.getLogger(TextFileReader.class);
    private TextFileReader.Offset offset;

    public TextFileReader(FileSystem fs, Path file, Map conf) throws IOException {
        this(fs, file, conf, new TextFileReader.Offset(0, 0));
    }

    public TextFileReader(FileSystem fs, Path file, Map conf, String startOffset) throws IOException {
        this(fs, file, conf, new TextFileReader.Offset(startOffset));
    }

    private TextFileReader(FileSystem fs, Path file, Map conf, TextFileReader.Offset startOffset)
            throws IOException {
        super(fs, file);
        offset = startOffset;
        FSDataInputStream in = fs.open(file);

        String charSet = (conf == null || !conf.containsKey(CHARSET)) ? "UTF-8" : conf.get(CHARSET).toString();
        int buffSz = (conf == null || !conf.containsKey(BUFFER_SIZE)) ? DEFAULT_BUFF_SIZE
                : Integer.parseInt(conf.get(BUFFER_SIZE).toString());
        reader = new BufferedReader(new InputStreamReader(in, charSet), buffSz);
        if (offset.charOffset > 0) {
            reader.skip(offset.charOffset);
        }

    }

    public Offset getFileOffset() {
        return offset.clone();
    }

    public List<Object> next() throws IOException, ParseException {
        String line = readLineAndTrackOffset(reader);
        if (line != null) {
            return Collections.singletonList((Object) line);
        }
        return null;
    }

    private String readLineAndTrackOffset(BufferedReader reader) throws IOException {
        StringBuffer sb = new StringBuffer(1000);
        long before = offset.charOffset;
        int ch;
        while ((ch = reader.read()) != -1) {
            ++offset.charOffset;
            if (ch == '\n') {
                ++offset.lineNumber;
                return sb.toString();
            } else if (ch != '\r') {
                sb.append((char) ch);
            }
        }
        if (before == offset.charOffset) { // reached EOF, didnt read anything
            return null;
        }
        return sb.toString();
    }

    @Override
    public void close() {
        try {
            reader.close();
        } catch (IOException e) {
            LOG.warn("Ignoring error when closing file " + getFilePath(), e);
        }
    }

    public static class Offset implements FileOffset {
        long charOffset;
        long lineNumber;

        public Offset(long byteOffset, long lineNumber) {
            this.charOffset = byteOffset;
            this.lineNumber = lineNumber;
        }

        public Offset(String offset) {
            if (offset == null) {
                throw new IllegalArgumentException("offset cannot be null");
            }
            try {
                if (offset.equalsIgnoreCase("0")) {
                    this.charOffset = 0;
                    this.lineNumber = 0;
                } else {
                    String[] parts = offset.split(":");
                    this.charOffset = Long.parseLong(parts[0].split("=")[1]);
                    this.lineNumber = Long.parseLong(parts[1].split("=")[1]);
                }
            } catch (Exception e) {
                throw new IllegalArgumentException(
                        "'" + offset + "' cannot be interpreted. It is not in expected format for TextFileReader."
                                + " Format e.g.  {char=123:line=5}");
            }
        }

        @Override
        public String toString() {
            return '{' + "char=" + charOffset + ":line=" + lineNumber + ":}";
        }

        @Override
        public boolean isNextOffset(FileOffset rhs) {
            if (rhs instanceof Offset) {
                Offset other = ((Offset) rhs);
                return other.charOffset > charOffset && other.lineNumber == lineNumber + 1;
            }
            return false;
        }

        @Override
        public int compareTo(FileOffset o) {
            Offset rhs = ((Offset) o);
            if (lineNumber < rhs.lineNumber) {
                return -1;
            }
            if (lineNumber == rhs.lineNumber) {
                return 0;
            }
            return 1;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (!(o instanceof Offset)) {
                return false;
            }

            Offset that = (Offset) o;

            if (charOffset != that.charOffset)
                return false;
            return lineNumber == that.lineNumber;
        }

        @Override
        public int hashCode() {
            int result = (int) (charOffset ^ (charOffset >>> 32));
            result = 31 * result + (int) (lineNumber ^ (lineNumber >>> 32));
            return result;
        }

        @Override
        public Offset clone() {
            return new Offset(charOffset, lineNumber);
        }
    } //class Offset
}