com.asakusafw.testdriver.DirectIoUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.testdriver.DirectIoUtil.java

Source

/**
 * Copyright 2011-2017 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.testdriver;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InterruptedIOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.asakusafw.runtime.directio.BinaryStreamFormat;
import com.asakusafw.runtime.directio.Counter;
import com.asakusafw.runtime.directio.DataFormat;
import com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil;
import com.asakusafw.runtime.directio.hadoop.HadoopFileFormat;
import com.asakusafw.runtime.io.ModelInput;
import com.asakusafw.runtime.io.ModelOutput;
import com.asakusafw.testdriver.core.DataModelDefinition;
import com.asakusafw.testdriver.core.DataModelReflection;
import com.asakusafw.testdriver.core.DataModelSink;
import com.asakusafw.testdriver.core.DataModelSinkFactory;
import com.asakusafw.testdriver.core.DataModelSource;
import com.asakusafw.testdriver.core.DataModelSourceFactory;
import com.asakusafw.testdriver.core.IteratorDataModelSource;
import com.asakusafw.testdriver.core.TestContext;

final class DirectIoUtil {

    static final Logger LOG = LoggerFactory.getLogger(DirectIoUtil.class);

    private DirectIoUtil() {
        return;
    }

    static <T> DataModelSourceFactory load(Configuration configuration, DataModelDefinition<T> definition,
            Class<? extends DataFormat<?>> formatClass, URL source) throws IOException, InterruptedException {
        DataFormat<? super T> format = newDataFormat(configuration, formatClass);
        return load(configuration, definition, format, source);
    }

    static <T> DataModelSourceFactory load(Configuration configuration, DataModelDefinition<T> definition,
            Class<? extends DataFormat<?>> formatClass, File source) throws IOException, InterruptedException {
        DataFormat<? super T> format = newDataFormat(configuration, formatClass);
        return load(configuration, definition, format, source);
    }

    private static <T> DataModelSourceFactory load(Configuration configuration, DataModelDefinition<T> definition,
            DataFormat<? super T> format, File source) throws IOException, InterruptedException {
        checkDataType(definition, format);
        if (format instanceof BinaryStreamFormat<?>) {
            return load0(definition, (BinaryStreamFormat<? super T>) format, source);
        }
        HadoopFileFormat<? super T> hFormat = HadoopDataSourceUtil.toHadoopFileFormat(configuration, format);
        return load0(definition, hFormat, source);
    }

    private static <T> DataModelSourceFactory load(Configuration configuration, DataModelDefinition<T> definition,
            DataFormat<? super T> format, URL source) throws IOException, InterruptedException {
        checkDataType(definition, format);
        if (source.getProtocol().equals("file")) { //$NON-NLS-1$
            File file = null;
            try {
                file = new File(source.toURI());
            } catch (URISyntaxException e) {
                LOG.debug("failed to convert URL into local file path: {}", source, e); //$NON-NLS-1$
            }
            if (file != null) {
                return load(configuration, definition, format, file);
            }
        }
        if (format instanceof BinaryStreamFormat<?>) {
            return load0(definition, (BinaryStreamFormat<? super T>) format, source);
        }
        HadoopFileFormat<? super T> hFormat = HadoopDataSourceUtil.toHadoopFileFormat(configuration, format);
        return load0(definition, hFormat, source);
    }

    private static <T> DataFormat<T> newDataFormat(Configuration configuration,
            Class<? extends DataFormat<?>> formatClass) {
        try {
            @SuppressWarnings("unchecked")
            DataFormat<T> format = (DataFormat<T>) formatClass.newInstance();
            if (format instanceof Configurable) {
                ((Configurable) format).setConf(configuration);
            }
            return format;
        } catch (ReflectiveOperationException e) {
            throw new IllegalStateException(e);
        }
    }

    private static void checkDataType(DataModelDefinition<?> definition, DataFormat<?> format) {
        if (format.getSupportedType().isAssignableFrom(definition.getModelClass()) == false) {
            throw new IllegalArgumentException(MessageFormat.format(
                    "inconsistent data format: data-type={0}, format-type={1}, supported-type={2}",
                    definition.getModelClass().getName(), format.getClass().getName(),
                    format.getSupportedType().getName()));
        }
    }

    private static <T> DataModelSourceFactory load0(DataModelDefinition<T> definition,
            BinaryStreamFormat<? super T> format, File source) throws IOException, InterruptedException {
        String path = source.toURI().toString();
        try (InputStream stream = new FileInputStream(source);
                ModelInput<? super T> input = format.createInput(definition.getModelClass(), path, stream)) {
            return collect(definition, input);
        }
    }

    private static <T> DataModelSourceFactory load0(DataModelDefinition<T> definition,
            HadoopFileFormat<? super T> format, File source) throws IOException, InterruptedException {
        try (ModelInput<? super T> input = format.createInput(definition.getModelClass(),
                org.apache.hadoop.fs.FileSystem.getLocal(format.getConf()),
                new org.apache.hadoop.fs.Path(source.toURI()), new Counter())) {
            return collect(definition, input);
        }
    }

    private static <T> DataModelSourceFactory load0(DataModelDefinition<T> definition,
            BinaryStreamFormat<? super T> format, URL source) throws IOException, InterruptedException {
        String path = source.toString();
        try (InputStream stream = source.openStream();
                ModelInput<? super T> input = format.createInput(definition.getModelClass(), path, stream)) {
            return collect(definition, input);
        }
    }

    private static <T> DataModelSourceFactory load0(DataModelDefinition<T> definition,
            HadoopFileFormat<? super T> format, URL source) throws IOException, InterruptedException {
        List<String> segments = Arrays.stream(source.getPath().split("/")) //$NON-NLS-1$
                .map(String::trim).filter(s -> s.isEmpty() == false).collect(Collectors.toList());
        String name;
        if (segments.isEmpty()) {
            name = "testing.file"; //$NON-NLS-1$
        } else {
            name = segments.get(segments.size() - 1);
        }
        Path tmpdir = Files.createTempDirectory("asakusa-"); //$NON-NLS-1$
        try (InputStream in = source.openStream()) {
            Path target = tmpdir.resolve(name);
            Files.copy(in, target);
            return load0(definition, format, target.toFile());
        } finally {
            File dir = tmpdir.toFile();
            if (FileUtils.deleteQuietly(dir) == false && dir.exists()) {
                LOG.warn(MessageFormat.format("failed to delete a temporary file: {0}", tmpdir));
            }
        }
    }

    private static <T> DataModelSourceFactory collect(DataModelDefinition<T> definition,
            ModelInput<? super T> input) throws IOException {
        List<DataModelReflection> loaded = new ArrayList<>();
        T object = newDataObject(definition);
        while (input.readTo(object)) {
            DataModelReflection ref = definition.toReflection(object);
            loaded.add(ref);
        }
        return new DataModelSourceFactory() {
            @Override
            public <U> DataModelSource createSource(DataModelDefinition<U> def, TestContext context) {
                return new IteratorDataModelSource(loaded.iterator());
            }
        };
    }

    static <T> DataModelSinkFactory dump(Configuration configuration, DataModelDefinition<T> definition,
            Class<? extends DataFormat<?>> formatClass, File destination) throws IOException {
        DataFormat<? super T> format = newDataFormat(configuration, formatClass);
        checkDataType(definition, format);
        org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(destination.toURI());
        HadoopFileFormat<? super T> hFormat = HadoopDataSourceUtil.toHadoopFileFormat(configuration, format);
        return new DataModelSinkFactory() {
            @Override
            public <S> DataModelSink createSink(DataModelDefinition<S> def, TestContext context)
                    throws IOException {
                try {
                    return new DirectOutputSink<>(definition, hFormat, path);
                } catch (InterruptedException e) {
                    throw (IOException) new InterruptedIOException().initCause(e);
                }
            }
        };
    }

    private static <T> T newDataObject(DataModelDefinition<T> definition) {
        try {
            return definition.getModelClass().newInstance();
        } catch (ReflectiveOperationException e) {
            throw new IllegalStateException(e);
        }
    }

    private static final class DirectOutputSink<T> implements DataModelSink {

        private final DataModelDefinition<T> definition;

        private final ModelOutput<? super T> output;

        DirectOutputSink(DataModelDefinition<T> definition, HadoopFileFormat<? super T> format,
                org.apache.hadoop.fs.Path destination) throws IOException, InterruptedException {
            this.definition = definition;
            this.output = format.createOutput(definition.getModelClass(),
                    org.apache.hadoop.fs.FileSystem.getLocal(format.getConf()), destination, new Counter());
        }

        @Override
        public void put(DataModelReflection model) throws IOException {
            T object = definition.toObject(model);
            output.write(object);
        }

        @Override
        public void close() throws IOException {
            output.close();
        }
    }
}