com.asakusafw.runtime.io.line.BasicLineInput.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.runtime.io.line.BasicLineInput.java

Source

/**
 * Copyright 2011-2016 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.runtime.io.line;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.text.MessageFormat;

import org.apache.hadoop.io.Text;

import com.asakusafw.runtime.value.StringOption;

/**
 * A simple line reader for text with any charset encoding.
 * @since 0.7.5
 */
public class BasicLineInput extends LineInput {

    private static final Text EMPTY = new Text();

    private static final int MIN_BUFFER_SIZE = 256;

    private final Reader reader;

    private final String path;

    private final CharsetEncoder encoder;

    private final CharBuffer charBuffer;

    private final ByteBuffer byteBuffer;

    private boolean sawCr;

    private boolean sawEof;

    private long lineNumber;

    /**
     * Creates a new instance.
     * @param stream the source stream
     * @param path the source path
     * @param configuration the current configuration
     * @throws IllegalArgumentException if some parameters were {@code null}
     * @see #newInstance(InputStream, String, LineConfiguration)
     */
    public BasicLineInput(InputStream stream, String path, LineConfiguration configuration) {
        if (stream == null) {
            throw new IllegalArgumentException("stream must not be null"); //$NON-NLS-1$
        }
        if (configuration == null) {
            throw new IllegalArgumentException("configuration must not be null"); //$NON-NLS-1$
        }
        this.reader = new InputStreamReader(stream, configuration.getCharset());
        this.path = path;
        this.encoder = INTERNAL_CHARSET.newEncoder().onMalformedInput(configuration.getMalformedInputAction())
                .onUnmappableCharacter(configuration.getUnmappableCharacterAction());
        this.charBuffer = CharBuffer.wrap(new char[Math.max(MIN_BUFFER_SIZE, configuration.getBufferSize())]);
        this.charBuffer.clear();
        this.charBuffer.flip();
        this.byteBuffer = ByteBuffer.wrap(new byte[Math.max(MIN_BUFFER_SIZE, configuration.getBufferSize()) / 2]);
        this.sawCr = false;
        this.sawEof = false;
        this.lineNumber = 0;
    }

    @Override
    public String getPath() {
        return path;
    }

    @Override
    public long getLineNumber() {
        if (lineNumber <= 0) {
            return -1;
        }
        return lineNumber;
    }

    @Override
    public boolean readTo(StringOption model) throws IOException {
        if (sawEof) {
            return false;
        }
        boolean changed = false;
        Text entity = null;
        while (true) {
            if (prepare() == false) {
                if (changed) {
                    lineNumber++;
                    return true;
                }
                return false;
            }
            if (entity == null) {
                entity = reset(model);
            }
            assert entity != null;
            State state = appendBufferTo(entity);
            switch (state) {
            case NOTHING:
                continue;
            case CONTINUE:
                changed = true;
                continue;
            case LINE_BREAK:
                lineNumber++;
                return true;
            default:
                throw new AssertionError();
            }
        }
    }

    private boolean prepare() throws IOException {
        CharBuffer b = charBuffer;
        if (b.hasRemaining()) {
            // already prepared
            return true;
        }
        b.clear();
        while (true) {
            int read = reader.read(b);
            if (read < 0) {
                sawEof = true;
                return false;
            } else if (read > 0) {
                b.flip();
                break;
            }
        }
        return true;
    }

    @SuppressWarnings("deprecation")
    private Text reset(StringOption model) {
        if (model.isNull()) {
            model.modify(EMPTY);
        } else {
            model.get().clear();
        }
        return model.get();
    }

    private State appendBufferTo(Text entity) throws IOException {
        CharBuffer b = charBuffer;
        assert b.hasRemaining();
        char[] cs = charBuffer.array();
        // skip LF after CR
        if (sawCr && cs[b.position()] == '\n') {
            b.position(b.position() + 1);
        }
        boolean eol = false;
        int len = 0;
        // scan buffer until CR/LF/buffer limit
        for (int i = b.position(), n = b.limit(); i < n; i++) {
            char c = cs[i];
            if (c == '\r' || c == '\n') {
                eol = true;
                sawCr = c == '\r';
                break;
            } else {
                len++;
            }
        }
        if (len > 0) {
            append(entity, len);
        }
        // skip next LF
        if (eol) {
            b.position(b.position() + 1);
        }
        if (len == 0) {
            return eol ? State.LINE_BREAK : State.NOTHING;
        } else {
            return eol ? State.LINE_BREAK : State.CONTINUE;
        }
    }

    private void append(Text entity, int len) throws IOException {
        ByteBuffer bs = byteBuffer;
        CharBuffer cs = charBuffer;
        int limit = cs.limit();
        // slice the buffer
        cs.limit(cs.position() + len);
        while (true) {
            bs.clear();
            CoderResult result = encoder.encode(cs, bs, true);
            if (result.isError() == false) {
                bs.flip();
                entity.append(bs.array(), bs.position(), bs.limit());
                if (result.isUnderflow()) {
                    break;
                }
            } else {
                assert result.isError();
                try {
                    result.throwException();
                } catch (CharacterCodingException e) {
                    throw new IOException(MessageFormat.format("exception occurred while encoding text: {0}", path),
                            e);
                }
            }
        }
        cs.limit(limit);
    }

    @Override
    public void close() throws IOException {
        reader.close();
    }

    private enum State {

        NOTHING,

        CONTINUE,

        LINE_BREAK,
    }
}