Java tutorial
/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.testdriver; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InterruptedIOException; import java.net.URISyntaxException; import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.asakusafw.runtime.directio.BinaryStreamFormat; import com.asakusafw.runtime.directio.Counter; import com.asakusafw.runtime.directio.DataFormat; import com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil; import com.asakusafw.runtime.directio.hadoop.HadoopFileFormat; import com.asakusafw.runtime.io.ModelInput; import com.asakusafw.runtime.io.ModelOutput; import com.asakusafw.testdriver.core.DataModelDefinition; import com.asakusafw.testdriver.core.DataModelReflection; import com.asakusafw.testdriver.core.DataModelSink; import com.asakusafw.testdriver.core.DataModelSinkFactory; import com.asakusafw.testdriver.core.DataModelSource; import com.asakusafw.testdriver.core.DataModelSourceFactory; import com.asakusafw.testdriver.core.IteratorDataModelSource; import com.asakusafw.testdriver.core.TestContext; final class DirectIoUtil { static final Logger LOG = LoggerFactory.getLogger(DirectIoUtil.class); private DirectIoUtil() { return; } static <T> DataModelSourceFactory load(Configuration configuration, DataModelDefinition<T> definition, Class<? extends DataFormat<?>> formatClass, URL source) throws IOException, InterruptedException { DataFormat<? super T> format = newDataFormat(configuration, formatClass); return load(configuration, definition, format, source); } static <T> DataModelSourceFactory load(Configuration configuration, DataModelDefinition<T> definition, Class<? extends DataFormat<?>> formatClass, File source) throws IOException, InterruptedException { DataFormat<? super T> format = newDataFormat(configuration, formatClass); return load(configuration, definition, format, source); } private static <T> DataModelSourceFactory load(Configuration configuration, DataModelDefinition<T> definition, DataFormat<? super T> format, File source) throws IOException, InterruptedException { checkDataType(definition, format); if (format instanceof BinaryStreamFormat<?>) { return load0(definition, (BinaryStreamFormat<? super T>) format, source); } HadoopFileFormat<? super T> hFormat = HadoopDataSourceUtil.toHadoopFileFormat(configuration, format); return load0(definition, hFormat, source); } private static <T> DataModelSourceFactory load(Configuration configuration, DataModelDefinition<T> definition, DataFormat<? super T> format, URL source) throws IOException, InterruptedException { checkDataType(definition, format); if (source.getProtocol().equals("file")) { //$NON-NLS-1$ File file = null; try { file = new File(source.toURI()); } catch (URISyntaxException e) { LOG.debug("failed to convert URL into local file path: {}", source, e); //$NON-NLS-1$ } if (file != null) { return load(configuration, definition, format, file); } } if (format instanceof BinaryStreamFormat<?>) { return load0(definition, (BinaryStreamFormat<? super T>) format, source); } HadoopFileFormat<? super T> hFormat = HadoopDataSourceUtil.toHadoopFileFormat(configuration, format); return load0(definition, hFormat, source); } private static <T> DataFormat<T> newDataFormat(Configuration configuration, Class<? extends DataFormat<?>> formatClass) { try { @SuppressWarnings("unchecked") DataFormat<T> format = (DataFormat<T>) formatClass.newInstance(); if (format instanceof Configurable) { ((Configurable) format).setConf(configuration); } return format; } catch (ReflectiveOperationException e) { throw new IllegalStateException(e); } } private static void checkDataType(DataModelDefinition<?> definition, DataFormat<?> format) { if (format.getSupportedType().isAssignableFrom(definition.getModelClass()) == false) { throw new IllegalArgumentException(MessageFormat.format( "inconsistent data format: data-type={0}, format-type={1}, supported-type={2}", definition.getModelClass().getName(), format.getClass().getName(), format.getSupportedType().getName())); } } private static <T> DataModelSourceFactory load0(DataModelDefinition<T> definition, BinaryStreamFormat<? super T> format, File source) throws IOException, InterruptedException { String path = source.toURI().toString(); try (InputStream stream = new FileInputStream(source); ModelInput<? super T> input = format.createInput(definition.getModelClass(), path, stream)) { return collect(definition, input); } } private static <T> DataModelSourceFactory load0(DataModelDefinition<T> definition, HadoopFileFormat<? super T> format, File source) throws IOException, InterruptedException { try (ModelInput<? super T> input = format.createInput(definition.getModelClass(), org.apache.hadoop.fs.FileSystem.getLocal(format.getConf()), new org.apache.hadoop.fs.Path(source.toURI()), new Counter())) { return collect(definition, input); } } private static <T> DataModelSourceFactory load0(DataModelDefinition<T> definition, BinaryStreamFormat<? super T> format, URL source) throws IOException, InterruptedException { String path = source.toString(); try (InputStream stream = source.openStream(); ModelInput<? super T> input = format.createInput(definition.getModelClass(), path, stream)) { return collect(definition, input); } } private static <T> DataModelSourceFactory load0(DataModelDefinition<T> definition, HadoopFileFormat<? super T> format, URL source) throws IOException, InterruptedException { List<String> segments = Arrays.stream(source.getPath().split("/")) //$NON-NLS-1$ .map(String::trim).filter(s -> s.isEmpty() == false).collect(Collectors.toList()); String name; if (segments.isEmpty()) { name = "testing.file"; //$NON-NLS-1$ } else { name = segments.get(segments.size() - 1); } Path tmpdir = Files.createTempDirectory("asakusa-"); //$NON-NLS-1$ try (InputStream in = source.openStream()) { Path target = tmpdir.resolve(name); Files.copy(in, target); return load0(definition, format, target.toFile()); } finally { File dir = tmpdir.toFile(); if (FileUtils.deleteQuietly(dir) == false && dir.exists()) { LOG.warn(MessageFormat.format("failed to delete a temporary file: {0}", tmpdir)); } } } private static <T> DataModelSourceFactory collect(DataModelDefinition<T> definition, ModelInput<? super T> input) throws IOException { List<DataModelReflection> loaded = new ArrayList<>(); T object = newDataObject(definition); while (input.readTo(object)) { DataModelReflection ref = definition.toReflection(object); loaded.add(ref); } return new DataModelSourceFactory() { @Override public <U> DataModelSource createSource(DataModelDefinition<U> def, TestContext context) { return new IteratorDataModelSource(loaded.iterator()); } }; } static <T> DataModelSinkFactory dump(Configuration configuration, DataModelDefinition<T> definition, Class<? extends DataFormat<?>> formatClass, File destination) throws IOException { DataFormat<? super T> format = newDataFormat(configuration, formatClass); checkDataType(definition, format); org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(destination.toURI()); HadoopFileFormat<? super T> hFormat = HadoopDataSourceUtil.toHadoopFileFormat(configuration, format); return new DataModelSinkFactory() { @Override public <S> DataModelSink createSink(DataModelDefinition<S> def, TestContext context) throws IOException { try { return new DirectOutputSink<>(definition, hFormat, path); } catch (InterruptedException e) { throw (IOException) new InterruptedIOException().initCause(e); } } }; } private static <T> T newDataObject(DataModelDefinition<T> definition) { try { return definition.getModelClass().newInstance(); } catch (ReflectiveOperationException e) { throw new IllegalStateException(e); } } private static final class DirectOutputSink<T> implements DataModelSink { private final DataModelDefinition<T> definition; private final ModelOutput<? super T> output; DirectOutputSink(DataModelDefinition<T> definition, HadoopFileFormat<? super T> format, org.apache.hadoop.fs.Path destination) throws IOException, InterruptedException { this.definition = definition; this.output = format.createOutput(definition.getModelClass(), org.apache.hadoop.fs.FileSystem.getLocal(format.getConf()), destination, new Counter()); } @Override public void put(DataModelReflection model) throws IOException { T object = definition.toObject(model); output.write(object); } @Override public void close() throws IOException { output.close(); } } }