com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtilTest.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtilTest.java

Source

/**
 * Copyright 2011-2016 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.runtime.directio.hadoop;

import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.hamcrest.BaseMatcher;
import org.hamcrest.Description;
import org.hamcrest.Matcher;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import com.asakusafw.runtime.compatibility.FileSystemCompatibility;
import com.asakusafw.runtime.directio.Counter;
import com.asakusafw.runtime.directio.DirectDataSource;
import com.asakusafw.runtime.directio.DirectDataSourceProfile;
import com.asakusafw.runtime.directio.DirectDataSourceRepository;
import com.asakusafw.runtime.directio.FilePattern;

/**
 * Test for {@link HadoopDataSourceUtil}.
 */
public class HadoopDataSourceUtilTest {

    /**
     * Temporary folder for testing.
     */
    @Rule
    public final TemporaryFolder folder = new TemporaryFolder();

    /**
     * Loads a simple profile.
     */
    @Test
    public void loadProfiles_simple() {
        Configuration conf = new Configuration();
        conf.set(key("root"), MockHadoopDataSource.class.getName());
        conf.set(key("root", "path"), "/");

        List<DirectDataSourceProfile> profiles = HadoopDataSourceUtil.loadProfiles(conf);
        assertThat(profiles.size(), is(1));

        DirectDataSourceProfile profile = find(profiles, "");
        assertThat(profile.getTargetClass(), equalTo((Object) MockHadoopDataSource.class));
        assertThat(profile.getAttributes(), is(map()));
    }

    /**
     * Loads a profile with path.
     */
    @Test
    public void loadProfiles_path() {
        Configuration conf = new Configuration();
        conf.set(key("root"), MockHadoopDataSource.class.getName());
        conf.set(key("root", "path"), "example/path");

        List<DirectDataSourceProfile> profiles = HadoopDataSourceUtil.loadProfiles(conf);
        assertThat(profiles.size(), is(1));

        DirectDataSourceProfile profile = find(profiles, "example/path");
        assertThat(profile.getTargetClass(), equalTo((Object) MockHadoopDataSource.class));
        assertThat(profile.getAttributes(), is(map()));
    }

    /**
     * Loads a profile with attributes.
     */
    @Test
    public void loadProfiles_attribute() {
        Configuration conf = new Configuration();
        conf.set(key("root"), MockHadoopDataSource.class.getName());
        conf.set(key("root", "path"), "/");
        conf.set(key("root", "hello1"), "world1");
        conf.set(key("root", "hello2"), "world2");
        conf.set(key("root", "hello3"), "world3");

        List<DirectDataSourceProfile> profiles = HadoopDataSourceUtil.loadProfiles(conf);
        assertThat(profiles.size(), is(1));

        DirectDataSourceProfile profile = find(profiles, "");
        assertThat(profile.getTargetClass(), equalTo((Object) MockHadoopDataSource.class));
        assertThat(profile.getAttributes(), is(map("hello1", "world1", "hello2", "world2", "hello3", "world3")));
    }

    /**
     * Loads multiple profiles.
     */
    @Test
    public void loadProfiles_multiple() {
        Configuration conf = new Configuration();

        conf.set(key("a"), MockHadoopDataSource.class.getName());
        conf.set(key("a", "path"), "aaa");

        conf.set(key("b"), MockHadoopDataSource.class.getName());
        conf.set(key("b", "path"), "bbb");

        conf.set(key("c"), MockHadoopDataSource.class.getName());
        conf.set(key("c", "path"), "ccc");

        List<DirectDataSourceProfile> profiles = HadoopDataSourceUtil.loadProfiles(conf);
        assertThat(profiles.size(), is(3));

        DirectDataSourceProfile a = find(profiles, "aaa");
        assertThat(a.getTargetClass(), equalTo((Object) MockHadoopDataSource.class));
        assertThat(a.getAttributes(), is(map()));

        DirectDataSourceProfile b = find(profiles, "bbb");
        assertThat(b.getTargetClass(), equalTo((Object) MockHadoopDataSource.class));
        assertThat(b.getAttributes(), is(map()));

        DirectDataSourceProfile c = find(profiles, "ccc");
        assertThat(c.getTargetClass(), equalTo((Object) MockHadoopDataSource.class));
        assertThat(c.getAttributes(), is(map()));
    }

    private Map<String, String> map(String... kvs) {
        assertThat(kvs.length % 2, is(0));
        Map<String, String> results = new HashMap<>();
        for (int i = 0; i < kvs.length; i += 2) {
            results.put(kvs[i], kvs[i + 1]);
        }
        return results;
    }

    private DirectDataSourceProfile find(List<DirectDataSourceProfile> profiles, String path) {
        for (DirectDataSourceProfile p : profiles) {
            if (p.getPath().equals(path)) {
                return p;
            }
        }
        throw new AssertionError(path);
    }

    private String key(String first, String... rest) {
        StringBuilder buf = new StringBuilder();
        buf.append(HadoopDataSourceUtil.PREFIX);
        buf.append(first);
        for (String s : rest) {
            buf.append(".");
            buf.append(s);
        }
        return buf.toString();
    }

    /**
     * create simple repository.
     * @throws Exception if failed
     */
    @Test
    public void loadRepository() throws Exception {
        Configuration conf = new Configuration();
        conf.set(key("testing"), MockHadoopDataSource.class.getName());
        conf.set(key("testing", "path"), "testing");
        conf.set(key("testing", "hello"), "world");
        DirectDataSourceRepository repo = HadoopDataSourceUtil.loadRepository(conf);
        DirectDataSource ds = repo.getRelatedDataSource("testing");
        assertThat(ds, instanceOf(MockHadoopDataSource.class));
        MockHadoopDataSource mock = (MockHadoopDataSource) ds;
        assertThat(mock.conf, is(notNullValue()));
        assertThat(mock.profile.getPath(), is("testing"));
    }

    /**
     * Test for transaction info.
     * @throws Exception if failed
     */
    @Test
    public void transactionInfo() throws Exception {
        Configuration conf = new Configuration();
        conf.set(HadoopDataSourceUtil.KEY_SYSTEM_DIR, folder.getRoot().getAbsoluteFile().toURI().toString());

        assertThat("empty system dir", folder.getRoot().listFiles(), is(new File[0]));
        assertThat(HadoopDataSourceUtil.findAllTransactionInfoFiles(conf).size(), is(0));

        Path t1 = HadoopDataSourceUtil.getTransactionInfoPath(conf, "ex1");
        assertThat(HadoopDataSourceUtil.getTransactionInfoExecutionId(t1), is("ex1"));
        t1.getFileSystem(conf).create(t1).close();

        assertThat(folder.getRoot().listFiles().length, is(greaterThan(0)));

        Path t2 = HadoopDataSourceUtil.getTransactionInfoPath(conf, "ex2");
        assertThat(t2, is(not(t1)));
        assertThat(HadoopDataSourceUtil.getTransactionInfoExecutionId(t2), is("ex2"));
        t2.getFileSystem(conf).create(t2).close();

        Path c2 = HadoopDataSourceUtil.getCommitMarkPath(conf, "ex2");
        assertThat(c2, is(not(t2)));
        c2.getFileSystem(conf).create(c2).close();

        List<Path> paths = new ArrayList<>();
        for (FileStatus stat : HadoopDataSourceUtil.findAllTransactionInfoFiles(conf)) {
            paths.add(stat.getPath());
        }
        assertThat(paths.size(), is(2));
        assertThat(paths, hasItem(t1));
        assertThat(paths, hasItem(t2));
    }

    /**
     * search by token.
     * @throws Exception if failed
     */
    @Test
    public void search_direct() throws Exception {
        touch("a.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("a.csv"));
        assertThat(normalize(results), is(path("a.csv")));
    }

    /**
     * search by token.
     * @throws Exception if failed
     */
    @Test
    public void search_direct_deep() throws Exception {
        touch("a.csv");
        touch("a/a.csv");
        touch("a/a/a.csv");
        touch("a/a/a/a.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("a/a/a.csv"));
        assertThat(normalize(results), is(path("a/a/a.csv")));
    }

    /**
     * search by wildcard.
     * @throws Exception if failed
     */
    @Test
    public void search_wildcard() throws Exception {
        touch("a.csv");
        touch("b.tsv");
        touch("c.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("*.csv"));
        assertThat(normalize(results), is(path("a.csv", "c.csv")));
    }

    /**
     * search by wildcard for directories.
     * @throws Exception if failed
     */
    @Test
    public void search_wildcard_dir() throws Exception {
        touch("a/a.csv");
        touch("b/b/b.csv");
        touch("c/c.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("*/*.csv"));
        assertThat(normalize(results), is(path("a/a.csv", "c/c.csv")));
    }

    /**
     * search using selection.
     * @throws Exception if failed
     */
    @Test
    public void search_selection() throws Exception {
        touch("a.csv");
        touch("b.csv");
        touch("c.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("{a|b}.csv"));
        assertThat(normalize(results), is(path("a.csv", "b.csv")));
    }

    /**
     * search using multiple selection.
     * @throws Exception if failed
     */
    @Test
    public void search_selection_multiple() throws Exception {
        touch("a/a.csv");
        touch("a/b.csv");
        touch("a/c.csv");
        touch("b/a.csv");
        touch("b/b.csv");
        touch("b/c.csv");
        touch("c/a.csv");
        touch("c/b.csv");
        touch("c/c.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(),
                FilePattern.compile("{a|b}/{b|c}.csv"));
        assertThat(normalize(results), is(path("a/b.csv", "a/c.csv", "b/b.csv", "b/c.csv")));
    }

    /**
     * search using complex selection.
     * @throws Exception if failed
     */
    @Test
    public void search_selection_complex() throws Exception {
        for (int year = 2001; year <= 2010; year++) {
            for (int month = 1; month <= 12; month++) {
                touch(String.format("data/%04d/%02d%s", year, month, ".csv"));
            }
        }
        FileSystem fs = getTempFileSystem();
        List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(),
                FilePattern.compile("data/{2005/12|2003/11}.csv"));
        assertThat(normalize(results), is(path("data/2005/12.csv", "data/2003/11.csv")));
    }

    /**
     * search by traverse.
     * @throws Exception if failed
     */
    @Test
    public void search_traverse() throws Exception {
        touch("a/a.csv");
        touch("b/b.csv");
        touch("c/c.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("**"));
        assertThat(normalize(results), is(path("", "a", "b", "c", "a/a.csv", "b/b.csv", "c/c.csv")));
    }

    /**
     * search by traverse only file.
     * @throws Exception if failed
     */
    @Test
    public void search_traverse_file() throws Exception {
        touch("a/a.csv");
        touch("b/b.csv");
        touch("c/c.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("**/*.csv"));
        assertThat(normalize(results), is(path("a/a.csv", "b/b.csv", "c/c.csv")));
    }

    /**
     * single file does not cover anything.
     * @throws Exception if failed
     */
    @Test
    public void minimalCovered_trivial() throws Exception {
        touch("a.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("**/*.csv"));
        assertThat(raw.size(), is(1));
        List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw);
        assertThat(normalize(results), is(path("a.csv")));
    }

    /**
     * single file does not cover anything.
     * @throws Exception if failed
     */
    @Test
    public void minimalCovered_siblings() throws Exception {
        touch("dir/a.csv");
        touch("dir/b.csv");
        touch("dir/c.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("**/*.csv"));
        assertThat(raw.size(), is(3));
        List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw);
        assertThat(normalize(results), is(path("dir/a.csv", "dir/b.csv", "dir/c.csv")));
    }

    /**
     * check covered.
     * @throws Exception if failed
     */
    @Test
    public void minimalCovered_parent() throws Exception {
        touch("dir/a.csv");
        touch("dir/b.csv");
        touch("dir/c.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("*/**"));
        assertThat(raw.size(), is(4));
        List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw);
        assertThat(normalize(results), is(path("dir")));
    }

    /**
     * check covered.
     * @throws Exception if failed
     */
    @Test
    public void minimalCovered_deep() throws Exception {
        touch("dir/a.csv");
        touch("dir/a/b.csv");
        touch("dir/a/b/c.csv");
        FileSystem fs = getTempFileSystem();
        List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("dir/**"));
        for (Iterator<FileStatus> iterator = raw.iterator(); iterator.hasNext();) {
            FileStatus fileStatus = iterator.next();
            if (fileStatus.getPath().getName().equals("dir")) {
                iterator.remove();
            }
        }
        assertThat(raw.size(), is(5));
        List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw);
        assertThat(normalize(results), is(path("dir/a.csv", "dir/a")));
    }

    /**
     * move files simply.
     * @throws Exception if failed
     */
    @Test
    public void move_simple() throws Exception {
        touch("src/a.csv");
        FileSystem fs = getTempFileSystem();
        HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst"));
        assertThat(collect(), is(path("dst/a.csv")));
    }

    /**
     * move multiple files.
     * @throws Exception if failed
     */
    @Test
    public void move_multiple() throws Exception {
        touch("src/a.csv");
        touch("src/b.csv");
        touch("src/c.csv");
        FileSystem fs = getTempFileSystem();
        HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst"));
        assertThat(collect(), is(path("dst/a.csv", "dst/b.csv", "dst/c.csv")));
    }

    /**
     * move deep files.
     * @throws Exception if failed
     */
    @Test
    public void move_deep() throws Exception {
        touch("src/a.csv");
        touch("src/a/b.csv");
        touch("src/a/b/c.csv");
        FileSystem fs = getTempFileSystem();
        HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst"));
        assertThat(collect(), is(path("dst/a.csv", "dst/a/b.csv", "dst/a/b/c.csv")));
    }

    /**
     * move multiple files.
     * @throws Exception if failed
     */
    @Test
    public void move_merge() throws Exception {
        touch("src/a.csv");
        touch("src/b.csv");
        touch("dst/c.csv");
        FileSystem fs = getTempFileSystem();
        HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst"));
        assertThat(collect(), is(path("dst/a.csv", "dst/b.csv", "dst/c.csv")));
    }

    private List<String> collect() throws IOException {
        List<FileStatus> all = HadoopDataSourceUtil.search(getTempFileSystem(), getBase(),
                FilePattern.compile("**"));
        List<FileStatus> files = new ArrayList<>();
        for (FileStatus stat : all) {
            if (FileSystemCompatibility.isDirectory(stat) == false) {
                files.add(stat);
            }
        }
        return normalize(files);
    }

    private List<String> normalize(List<FileStatus> stats) throws IOException {
        File base = folder.getRoot().getCanonicalFile();
        List<String> normalized = new ArrayList<>();
        for (FileStatus stat : stats) {
            URI uri = stat.getPath().toUri();
            try {
                File file = new File(uri).getCanonicalFile();
                String f = file.getAbsolutePath();
                String b = base.getAbsolutePath();
                assertThat(f, startsWith(b));
                String r = f.substring(b.length());
                while (r.startsWith(File.separator)) {
                    r = r.substring(1);
                }
                if (File.separatorChar != '/') {
                    r = r.replace(File.separatorChar, '/');
                }
                normalized.add(r);
            } catch (IOException e) {
                throw new AssertionError(e);
            }
        }
        Collections.sort(normalized);
        return normalized;
    }

    private Matcher<List<String>> path(final String... paths) {
        return new BaseMatcher<List<String>>() {
            @Override
            public boolean matches(Object obj) {
                @SuppressWarnings("unchecked")
                List<String> actuals = (List<String>) obj;
                List<String> normalized = new ArrayList<>(actuals);
                List<String> expected = new ArrayList<>();
                Collections.addAll(expected, paths);
                Collections.sort(expected);
                Collections.sort(normalized);
                return expected.equals(normalized);
            }

            @Override
            public void describeTo(Description desc) {
                desc.appendText(Arrays.toString(paths));
            }
        };
    }

    private FileSystem getTempFileSystem() throws IOException {
        Configuration conf = new Configuration();
        LocalFileSystem local = FileSystem.getLocal(conf);
        return local;
    }

    private Path getBase() {
        return new Path(folder.getRoot().toURI());
    }

    private Path getPath(String path) {
        return new Path(getBase(), path);
    }

    private void touch(String path) throws IOException {
        File file = new File(folder.getRoot(), path);
        file.getParentFile().mkdirs();
        file.createNewFile();
        assertThat(file.isFile(), is(true));
    }
}