com.asakusafw.runtime.directio.hadoop.SequenceFileFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.runtime.directio.hadoop.SequenceFileFormat.java

Source

/**
 * Copyright 2011-2017 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.runtime.directio.hadoop;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.MessageFormat;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.util.ReflectionUtils;

import com.asakusafw.runtime.directio.Counter;
import com.asakusafw.runtime.io.ModelInput;
import com.asakusafw.runtime.io.ModelOutput;

/**
 * Data model format of {@link InputStream} / {@link OutputStream} .
 * This implementation class must have a public constructor without any parameters.
 * @param <K> the type of raw sequence file key
 * @param <V> the type of raw sequence file value
 * @param <T> the type of target data model
 * @since 0.2.6
 * @version 0.4.0
 */
public abstract class SequenceFileFormat<K, V, T> extends HadoopFileFormat<T> {

    static final Log LOG = LogFactory.getLog(SequenceFileFormat.class);

    static final String KEY_COMPRESSION_CODEC = "com.asakusafw.output.sequencefile.compression.codec"; //$NON-NLS-1$

    static final String VALUE_COMPRESSION_AUTO = "auto"; //$NON-NLS-1$

    @Override
    public long getMinimumFragmentSize() throws IOException, InterruptedException {
        return SequenceFile.SYNC_INTERVAL;
    }

    /**
     * Returns a key object.
     * @return a key object
     */
    protected abstract K createKeyObject();

    /**
     * Returns a value object.
     * @return a value object
     */
    protected abstract V createValueObject();

    /**
     * Copy key and value into the target data model.
     * @param key the source key object
     * @param value the source value object
     * @param model the target data model
     * @throws IOException if failed to copy
     */
    protected abstract void copyToModel(K key, V value, T model) throws IOException;

    /**
     * Copy the data model into the key and value.
     * @param model the source data model
     * @param key the target key object
     * @param value the target value object
     * @throws IOException if failed to copy
     */
    protected abstract void copyFromModel(T model, K key, V value) throws IOException;

    @Override
    public ModelInput<T> createInput(Class<? extends T> dataType, FileSystem fileSystem, Path path, long offset,
            long fragmentSize, Counter counter) throws IOException, InterruptedException {
        long end = offset + fragmentSize;
        K keyBuffer = createKeyObject();
        V valueBuffer = createValueObject();
        SequenceFile.Reader reader;
        try {
            reader = new SequenceFile.Reader(getConf(), SequenceFile.Reader.file(fileSystem.makeQualified(path)));
        } catch (EOFException e) {
            FileStatus status = fileSystem.getFileStatus(path);
            if (status.getLen() == 0L) {
                LOG.warn(MessageFormat.format("Target sequence file is empty: {0}", path));
                return new ModelInput<T>() {
                    @Override
                    public boolean readTo(T model) throws IOException {
                        return false;
                    }

                    @Override
                    public void close() throws IOException {
                        return;
                    }
                };
            }
            throw e;
        }
        boolean succeed = false;
        try {
            if (offset > reader.getPosition()) {
                reader.sync(offset);
            }
            ModelInput<T> result = new ModelInput<T>() {

                private boolean next = reader.getPosition() < end;

                private long lastPosition = reader.getPosition();

                @Override
                public boolean readTo(T model) throws IOException {
                    if (next == false) {
                        return false;
                    }
                    long current = reader.getPosition();
                    @SuppressWarnings("unchecked")
                    K key = (K) reader.next(keyBuffer);
                    if (key == null || (current >= end && reader.syncSeen())) {
                        next = false;
                        return false;
                    } else {
                        reader.getCurrentValue(valueBuffer);
                        SequenceFileFormat.this.copyToModel(keyBuffer, valueBuffer, model);
                        long nextPosition = reader.getPosition();
                        counter.add(nextPosition - lastPosition);
                        lastPosition = nextPosition;
                        return true;
                    }
                }

                @Override
                public void close() throws IOException {
                    reader.close();
                }
            };
            succeed = true;
            return result;
        } finally {
            if (succeed == false) {
                reader.close();
            }
        }
    }

    @Override
    public ModelOutput<T> createOutput(Class<? extends T> dataType, FileSystem fileSystem, Path path,
            Counter counter) throws IOException, InterruptedException {
        K keyBuffer = createKeyObject();
        V valueBuffer = createValueObject();
        CompressionCodec codec = getCompressionCodec(path);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Creating sequence file (path={0}, type={1}, codec={2})", //$NON-NLS-1$
                    path, dataType.getName(), codec));
        }
        configure(codec);
        SequenceFile.Writer writer = SequenceFile.createWriter(getConf(),
                SequenceFile.Writer.file(fileSystem.makeQualified(path)),
                SequenceFile.Writer.keyClass(keyBuffer.getClass()),
                SequenceFile.Writer.valueClass(valueBuffer.getClass()), SequenceFile.Writer
                        .compression(codec == null ? CompressionType.NONE : CompressionType.BLOCK, codec));
        boolean succeed = false;
        try {
            ModelOutput<T> output = new ModelOutput<T>() {

                private long lastPosition = 0;

                @Override
                public void write(T model) throws IOException {
                    copyFromModel(model, keyBuffer, valueBuffer);
                    writer.append(keyBuffer, valueBuffer);
                    long nextPosition = writer.getLength();
                    counter.add(nextPosition - lastPosition);
                    lastPosition = nextPosition;
                }

                @Override
                public void close() throws IOException {
                    writer.close();
                }
            };
            succeed = true;
            return output;
        } finally {
            if (succeed == false) {
                writer.close();
            }
        }
    }

    private void configure(Object object) {
        if (object instanceof Configurable) {
            Configurable configurable = (Configurable) object;
            if (configurable.getConf() == null) {
                configurable.setConf(getConf());
            }
        }
    }

    /**
     * Returns a compression codec for output sequence files.
     * Clients can override this method in subclasses, and return the suitable {@link CompressionCodec} object.
     * @param path target path
     * @return a compression codec used to output, or {@code null} if output will not be compressed
     * @throws IOException if failed to create a compression codec
     * @throws InterruptedException if interrupted
     */
    public CompressionCodec getCompressionCodec(Path path) throws IOException, InterruptedException {
        String codecClassName = getConf().get(KEY_COMPRESSION_CODEC);
        if (codecClassName != null && codecClassName.isEmpty() == false) {
            try {
                Class<?> codecClass = getConf().getClassByName(codecClassName);
                return ReflectionUtils.newInstance(codecClass.asSubclass(CompressionCodec.class), getConf());
            } catch (Exception e) {
                LOG.warn(MessageFormat.format("Failed to load compression codec ({0}={1})", KEY_COMPRESSION_CODEC,
                        codecClassName), e);
                return null;
            }
        }
        return null;
    }
}