Java tutorial
/** * Copyright 2011-2016 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.directio.hadoop; import static org.hamcrest.Matchers.*; import static org.junit.Assert.*; import java.io.File; import java.io.IOException; import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.hamcrest.BaseMatcher; import org.hamcrest.Description; import org.hamcrest.Matcher; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import com.asakusafw.runtime.compatibility.FileSystemCompatibility; import com.asakusafw.runtime.directio.Counter; import com.asakusafw.runtime.directio.DirectDataSource; import com.asakusafw.runtime.directio.DirectDataSourceProfile; import com.asakusafw.runtime.directio.DirectDataSourceRepository; import com.asakusafw.runtime.directio.FilePattern; /** * Test for {@link HadoopDataSourceUtil}. */ public class HadoopDataSourceUtilTest { /** * Temporary folder for testing. */ @Rule public final TemporaryFolder folder = new TemporaryFolder(); /** * Loads a simple profile. */ @Test public void loadProfiles_simple() { Configuration conf = new Configuration(); conf.set(key("root"), MockHadoopDataSource.class.getName()); conf.set(key("root", "path"), "/"); List<DirectDataSourceProfile> profiles = HadoopDataSourceUtil.loadProfiles(conf); assertThat(profiles.size(), is(1)); DirectDataSourceProfile profile = find(profiles, ""); assertThat(profile.getTargetClass(), equalTo((Object) MockHadoopDataSource.class)); assertThat(profile.getAttributes(), is(map())); } /** * Loads a profile with path. */ @Test public void loadProfiles_path() { Configuration conf = new Configuration(); conf.set(key("root"), MockHadoopDataSource.class.getName()); conf.set(key("root", "path"), "example/path"); List<DirectDataSourceProfile> profiles = HadoopDataSourceUtil.loadProfiles(conf); assertThat(profiles.size(), is(1)); DirectDataSourceProfile profile = find(profiles, "example/path"); assertThat(profile.getTargetClass(), equalTo((Object) MockHadoopDataSource.class)); assertThat(profile.getAttributes(), is(map())); } /** * Loads a profile with attributes. */ @Test public void loadProfiles_attribute() { Configuration conf = new Configuration(); conf.set(key("root"), MockHadoopDataSource.class.getName()); conf.set(key("root", "path"), "/"); conf.set(key("root", "hello1"), "world1"); conf.set(key("root", "hello2"), "world2"); conf.set(key("root", "hello3"), "world3"); List<DirectDataSourceProfile> profiles = HadoopDataSourceUtil.loadProfiles(conf); assertThat(profiles.size(), is(1)); DirectDataSourceProfile profile = find(profiles, ""); assertThat(profile.getTargetClass(), equalTo((Object) MockHadoopDataSource.class)); assertThat(profile.getAttributes(), is(map("hello1", "world1", "hello2", "world2", "hello3", "world3"))); } /** * Loads multiple profiles. */ @Test public void loadProfiles_multiple() { Configuration conf = new Configuration(); conf.set(key("a"), MockHadoopDataSource.class.getName()); conf.set(key("a", "path"), "aaa"); conf.set(key("b"), MockHadoopDataSource.class.getName()); conf.set(key("b", "path"), "bbb"); conf.set(key("c"), MockHadoopDataSource.class.getName()); conf.set(key("c", "path"), "ccc"); List<DirectDataSourceProfile> profiles = HadoopDataSourceUtil.loadProfiles(conf); assertThat(profiles.size(), is(3)); DirectDataSourceProfile a = find(profiles, "aaa"); assertThat(a.getTargetClass(), equalTo((Object) MockHadoopDataSource.class)); assertThat(a.getAttributes(), is(map())); DirectDataSourceProfile b = find(profiles, "bbb"); assertThat(b.getTargetClass(), equalTo((Object) MockHadoopDataSource.class)); assertThat(b.getAttributes(), is(map())); DirectDataSourceProfile c = find(profiles, "ccc"); assertThat(c.getTargetClass(), equalTo((Object) MockHadoopDataSource.class)); assertThat(c.getAttributes(), is(map())); } private Map<String, String> map(String... kvs) { assertThat(kvs.length % 2, is(0)); Map<String, String> results = new HashMap<>(); for (int i = 0; i < kvs.length; i += 2) { results.put(kvs[i], kvs[i + 1]); } return results; } private DirectDataSourceProfile find(List<DirectDataSourceProfile> profiles, String path) { for (DirectDataSourceProfile p : profiles) { if (p.getPath().equals(path)) { return p; } } throw new AssertionError(path); } private String key(String first, String... rest) { StringBuilder buf = new StringBuilder(); buf.append(HadoopDataSourceUtil.PREFIX); buf.append(first); for (String s : rest) { buf.append("."); buf.append(s); } return buf.toString(); } /** * create simple repository. * @throws Exception if failed */ @Test public void loadRepository() throws Exception { Configuration conf = new Configuration(); conf.set(key("testing"), MockHadoopDataSource.class.getName()); conf.set(key("testing", "path"), "testing"); conf.set(key("testing", "hello"), "world"); DirectDataSourceRepository repo = HadoopDataSourceUtil.loadRepository(conf); DirectDataSource ds = repo.getRelatedDataSource("testing"); assertThat(ds, instanceOf(MockHadoopDataSource.class)); MockHadoopDataSource mock = (MockHadoopDataSource) ds; assertThat(mock.conf, is(notNullValue())); assertThat(mock.profile.getPath(), is("testing")); } /** * Test for transaction info. * @throws Exception if failed */ @Test public void transactionInfo() throws Exception { Configuration conf = new Configuration(); conf.set(HadoopDataSourceUtil.KEY_SYSTEM_DIR, folder.getRoot().getAbsoluteFile().toURI().toString()); assertThat("empty system dir", folder.getRoot().listFiles(), is(new File[0])); assertThat(HadoopDataSourceUtil.findAllTransactionInfoFiles(conf).size(), is(0)); Path t1 = HadoopDataSourceUtil.getTransactionInfoPath(conf, "ex1"); assertThat(HadoopDataSourceUtil.getTransactionInfoExecutionId(t1), is("ex1")); t1.getFileSystem(conf).create(t1).close(); assertThat(folder.getRoot().listFiles().length, is(greaterThan(0))); Path t2 = HadoopDataSourceUtil.getTransactionInfoPath(conf, "ex2"); assertThat(t2, is(not(t1))); assertThat(HadoopDataSourceUtil.getTransactionInfoExecutionId(t2), is("ex2")); t2.getFileSystem(conf).create(t2).close(); Path c2 = HadoopDataSourceUtil.getCommitMarkPath(conf, "ex2"); assertThat(c2, is(not(t2))); c2.getFileSystem(conf).create(c2).close(); List<Path> paths = new ArrayList<>(); for (FileStatus stat : HadoopDataSourceUtil.findAllTransactionInfoFiles(conf)) { paths.add(stat.getPath()); } assertThat(paths.size(), is(2)); assertThat(paths, hasItem(t1)); assertThat(paths, hasItem(t2)); } /** * search by token. * @throws Exception if failed */ @Test public void search_direct() throws Exception { touch("a.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("a.csv")); assertThat(normalize(results), is(path("a.csv"))); } /** * search by token. * @throws Exception if failed */ @Test public void search_direct_deep() throws Exception { touch("a.csv"); touch("a/a.csv"); touch("a/a/a.csv"); touch("a/a/a/a.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("a/a/a.csv")); assertThat(normalize(results), is(path("a/a/a.csv"))); } /** * search by wildcard. * @throws Exception if failed */ @Test public void search_wildcard() throws Exception { touch("a.csv"); touch("b.tsv"); touch("c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("*.csv")); assertThat(normalize(results), is(path("a.csv", "c.csv"))); } /** * search by wildcard for directories. * @throws Exception if failed */ @Test public void search_wildcard_dir() throws Exception { touch("a/a.csv"); touch("b/b/b.csv"); touch("c/c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("*/*.csv")); assertThat(normalize(results), is(path("a/a.csv", "c/c.csv"))); } /** * search using selection. * @throws Exception if failed */ @Test public void search_selection() throws Exception { touch("a.csv"); touch("b.csv"); touch("c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("{a|b}.csv")); assertThat(normalize(results), is(path("a.csv", "b.csv"))); } /** * search using multiple selection. * @throws Exception if failed */ @Test public void search_selection_multiple() throws Exception { touch("a/a.csv"); touch("a/b.csv"); touch("a/c.csv"); touch("b/a.csv"); touch("b/b.csv"); touch("b/c.csv"); touch("c/a.csv"); touch("c/b.csv"); touch("c/c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("{a|b}/{b|c}.csv")); assertThat(normalize(results), is(path("a/b.csv", "a/c.csv", "b/b.csv", "b/c.csv"))); } /** * search using complex selection. * @throws Exception if failed */ @Test public void search_selection_complex() throws Exception { for (int year = 2001; year <= 2010; year++) { for (int month = 1; month <= 12; month++) { touch(String.format("data/%04d/%02d%s", year, month, ".csv")); } } FileSystem fs = getTempFileSystem(); List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("data/{2005/12|2003/11}.csv")); assertThat(normalize(results), is(path("data/2005/12.csv", "data/2003/11.csv"))); } /** * search by traverse. * @throws Exception if failed */ @Test public void search_traverse() throws Exception { touch("a/a.csv"); touch("b/b.csv"); touch("c/c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("**")); assertThat(normalize(results), is(path("", "a", "b", "c", "a/a.csv", "b/b.csv", "c/c.csv"))); } /** * search by traverse only file. * @throws Exception if failed */ @Test public void search_traverse_file() throws Exception { touch("a/a.csv"); touch("b/b.csv"); touch("c/c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> results = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("**/*.csv")); assertThat(normalize(results), is(path("a/a.csv", "b/b.csv", "c/c.csv"))); } /** * single file does not cover anything. * @throws Exception if failed */ @Test public void minimalCovered_trivial() throws Exception { touch("a.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("**/*.csv")); assertThat(raw.size(), is(1)); List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw); assertThat(normalize(results), is(path("a.csv"))); } /** * single file does not cover anything. * @throws Exception if failed */ @Test public void minimalCovered_siblings() throws Exception { touch("dir/a.csv"); touch("dir/b.csv"); touch("dir/c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("**/*.csv")); assertThat(raw.size(), is(3)); List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw); assertThat(normalize(results), is(path("dir/a.csv", "dir/b.csv", "dir/c.csv"))); } /** * check covered. * @throws Exception if failed */ @Test public void minimalCovered_parent() throws Exception { touch("dir/a.csv"); touch("dir/b.csv"); touch("dir/c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("*/**")); assertThat(raw.size(), is(4)); List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw); assertThat(normalize(results), is(path("dir"))); } /** * check covered. * @throws Exception if failed */ @Test public void minimalCovered_deep() throws Exception { touch("dir/a.csv"); touch("dir/a/b.csv"); touch("dir/a/b/c.csv"); FileSystem fs = getTempFileSystem(); List<FileStatus> raw = HadoopDataSourceUtil.search(fs, getBase(), FilePattern.compile("dir/**")); for (Iterator<FileStatus> iterator = raw.iterator(); iterator.hasNext();) { FileStatus fileStatus = iterator.next(); if (fileStatus.getPath().getName().equals("dir")) { iterator.remove(); } } assertThat(raw.size(), is(5)); List<FileStatus> results = HadoopDataSourceUtil.onlyMinimalCovered(raw); assertThat(normalize(results), is(path("dir/a.csv", "dir/a"))); } /** * move files simply. * @throws Exception if failed */ @Test public void move_simple() throws Exception { touch("src/a.csv"); FileSystem fs = getTempFileSystem(); HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst")); assertThat(collect(), is(path("dst/a.csv"))); } /** * move multiple files. * @throws Exception if failed */ @Test public void move_multiple() throws Exception { touch("src/a.csv"); touch("src/b.csv"); touch("src/c.csv"); FileSystem fs = getTempFileSystem(); HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst")); assertThat(collect(), is(path("dst/a.csv", "dst/b.csv", "dst/c.csv"))); } /** * move deep files. * @throws Exception if failed */ @Test public void move_deep() throws Exception { touch("src/a.csv"); touch("src/a/b.csv"); touch("src/a/b/c.csv"); FileSystem fs = getTempFileSystem(); HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst")); assertThat(collect(), is(path("dst/a.csv", "dst/a/b.csv", "dst/a/b/c.csv"))); } /** * move multiple files. * @throws Exception if failed */ @Test public void move_merge() throws Exception { touch("src/a.csv"); touch("src/b.csv"); touch("dst/c.csv"); FileSystem fs = getTempFileSystem(); HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst")); assertThat(collect(), is(path("dst/a.csv", "dst/b.csv", "dst/c.csv"))); } private List<String> collect() throws IOException { List<FileStatus> all = HadoopDataSourceUtil.search(getTempFileSystem(), getBase(), FilePattern.compile("**")); List<FileStatus> files = new ArrayList<>(); for (FileStatus stat : all) { if (FileSystemCompatibility.isDirectory(stat) == false) { files.add(stat); } } return normalize(files); } private List<String> normalize(List<FileStatus> stats) throws IOException { File base = folder.getRoot().getCanonicalFile(); List<String> normalized = new ArrayList<>(); for (FileStatus stat : stats) { URI uri = stat.getPath().toUri(); try { File file = new File(uri).getCanonicalFile(); String f = file.getAbsolutePath(); String b = base.getAbsolutePath(); assertThat(f, startsWith(b)); String r = f.substring(b.length()); while (r.startsWith(File.separator)) { r = r.substring(1); } if (File.separatorChar != '/') { r = r.replace(File.separatorChar, '/'); } normalized.add(r); } catch (IOException e) { throw new AssertionError(e); } } Collections.sort(normalized); return normalized; } private Matcher<List<String>> path(final String... paths) { return new BaseMatcher<List<String>>() { @Override public boolean matches(Object obj) { @SuppressWarnings("unchecked") List<String> actuals = (List<String>) obj; List<String> normalized = new ArrayList<>(actuals); List<String> expected = new ArrayList<>(); Collections.addAll(expected, paths); Collections.sort(expected); Collections.sort(normalized); return expected.equals(normalized); } @Override public void describeTo(Description desc) { desc.appendText(Arrays.toString(paths)); } }; } private FileSystem getTempFileSystem() throws IOException { Configuration conf = new Configuration(); LocalFileSystem local = FileSystem.getLocal(conf); return local; } private Path getBase() { return new Path(folder.getRoot().toURI()); } private Path getPath(String path) { return new Path(getBase(), path); } private void touch(String path) throws IOException { File file = new File(folder.getRoot(), path); file.getParentFile().mkdirs(); file.createNewFile(); assertThat(file.isFile(), is(true)); } }