org.apache.hadoop.tools.TestDistCp.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.tools.TestDistCp.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.tools;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobSubmissionFiles;
import org.apache.hadoop.mapreduce.Cluster;
import org.apache.hadoop.tools.mapred.CopyOutputFormat;
import org.junit.*;

import java.util.List;
import java.util.ArrayList;
import java.io.*;

@Ignore
public class TestDistCp {
    private static final Log LOG = LogFactory.getLog(TestDistCp.class);
    private static List<Path> pathList = new ArrayList<Path>();
    private static final int FILE_SIZE = 1024;

    private static Configuration configuration;
    private static MiniDFSCluster cluster;
    private static MiniMRCluster mrCluster;

    private static final String SOURCE_PATH = "/tmp/source";
    private static final String TARGET_PATH = "/tmp/target";

    @BeforeClass
    public static void setup() throws Exception {
        configuration = getConfigurationForCluster();
        cluster = new MiniDFSCluster.Builder(configuration).numDataNodes(1).format(true).build();
        System.setProperty("org.apache.hadoop.mapred.TaskTracker", "target/tmp");
        configuration.set("org.apache.hadoop.mapred.TaskTracker", "target/tmp");
        System.setProperty("hadoop.log.dir", "target/tmp");
        configuration.set("hadoop.log.dir", "target/tmp");
        mrCluster = new MiniMRCluster(1, cluster.getFileSystem().getUri().toString(), 1);
        Configuration mrConf = mrCluster.createJobConf();
        final String mrJobTracker = mrConf.get("mapred.job.tracker");
        configuration.set("mapred.job.tracker", mrJobTracker);
        final String mrJobTrackerAddress = mrConf.get("mapred.job.tracker.http.address");
        configuration.set("mapred.job.tracker.http.address", mrJobTrackerAddress);
    }

    @AfterClass
    public static void cleanup() {
        if (mrCluster != null)
            mrCluster.shutdown();
        if (cluster != null)
            cluster.shutdown();
    }

    private static Configuration getConfigurationForCluster() throws IOException {
        Configuration configuration = new Configuration();
        System.setProperty("test.build.data", "target/build/TEST_DISTCP/data");
        configuration.set("hadoop.log.dir", "target/tmp");

        LOG.debug("fs.default.name  == " + configuration.get("fs.default.name"));
        LOG.debug("dfs.http.address == " + configuration.get("dfs.http.address"));
        return configuration;
    }

    private static void createSourceData() throws Exception {
        mkdirs(SOURCE_PATH + "/1");
        mkdirs(SOURCE_PATH + "/2");
        mkdirs(SOURCE_PATH + "/2/3/4");
        mkdirs(SOURCE_PATH + "/2/3");
        mkdirs(SOURCE_PATH + "/5");
        touchFile(SOURCE_PATH + "/5/6");
        mkdirs(SOURCE_PATH + "/7");
        mkdirs(SOURCE_PATH + "/7/8");
        touchFile(SOURCE_PATH + "/7/8/9");
    }

    private static void mkdirs(String path) throws Exception {
        FileSystem fileSystem = cluster.getFileSystem();
        final Path qualifiedPath = new Path(path).makeQualified(fileSystem.getUri(),
                fileSystem.getWorkingDirectory());
        pathList.add(qualifiedPath);
        fileSystem.mkdirs(qualifiedPath);
    }

    private static void touchFile(String path) throws Exception {
        FileSystem fs;
        DataOutputStream outputStream = null;
        try {
            fs = cluster.getFileSystem();
            final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(), fs.getWorkingDirectory());
            final long blockSize = fs.getDefaultBlockSize(new Path(path)) * 2;
            outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication(new Path(path)) * 2),
                    blockSize);
            outputStream.write(new byte[FILE_SIZE]);
            pathList.add(qualifiedPath);
        } finally {
            IOUtils.cleanup(null, outputStream);
        }
    }

    private static void clearState() throws Exception {
        pathList.clear();
        cluster.getFileSystem().delete(new Path(TARGET_PATH), true);
        createSourceData();
    }

    //  @Test
    public void testUniformSizeDistCp() throws Exception {
        try {
            clearState();
            final FileSystem fileSystem = cluster.getFileSystem();
            Path sourcePath = new Path(SOURCE_PATH).makeQualified(fileSystem.getUri(),
                    fileSystem.getWorkingDirectory());
            List<Path> sources = new ArrayList<Path>();
            sources.add(sourcePath);

            Path targetPath = new Path(TARGET_PATH).makeQualified(fileSystem.getUri(),
                    fileSystem.getWorkingDirectory());
            DistCpOptions options = new DistCpOptions(sources, targetPath);
            options.setAtomicCommit(true);
            options.setBlocking(false);
            Job job = new DistCp(configuration, options).execute();
            Path workDir = CopyOutputFormat.getWorkingDirectory(job);
            Path finalDir = CopyOutputFormat.getCommitDirectory(job);

            while (!job.isComplete()) {
                if (cluster.getFileSystem().exists(workDir)) {
                    break;
                }
            }
            job.waitForCompletion(true);
            Assert.assertFalse(cluster.getFileSystem().exists(workDir));
            Assert.assertTrue(cluster.getFileSystem().exists(finalDir));
            Assert.assertFalse(cluster.getFileSystem()
                    .exists(new Path(job.getConfiguration().get(DistCpConstants.CONF_LABEL_META_FOLDER))));
            verifyResults();
        } catch (Exception e) {
            LOG.error("Exception encountered", e);
            Assert.fail("Unexpected exception: " + e.getMessage());
        }
    }

    //  @Test
    public void testCleanup() {
        try {
            clearState();
            Path sourcePath = new Path("noscheme:///file");
            List<Path> sources = new ArrayList<Path>();
            sources.add(sourcePath);

            final FileSystem fs = cluster.getFileSystem();
            Path targetPath = new Path(TARGET_PATH).makeQualified(fs.getUri(), fs.getWorkingDirectory());
            DistCpOptions options = new DistCpOptions(sources, targetPath);

            Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(configuration), configuration);
            stagingDir.getFileSystem(configuration).mkdirs(stagingDir);

            try {
                new DistCp(configuration, options).execute();
            } catch (Throwable t) {
                Assert.assertEquals(stagingDir.getFileSystem(configuration).listStatus(stagingDir).length, 0);
            }
        } catch (Exception e) {
            LOG.error("Exception encountered ", e);
            Assert.fail("testCleanup failed " + e.getMessage());
        }
    }

    @Test
    public void testRootPath() throws Exception {
        try {
            clearState();
            List<Path> sources = new ArrayList<Path>();
            final FileSystem fs = cluster.getFileSystem();
            sources.add(new Path("/a").makeQualified(fs.getUri(), fs.getWorkingDirectory()));
            sources.add(new Path("/b").makeQualified(fs.getUri(), fs.getWorkingDirectory()));
            touchFile("/a/a.txt");
            touchFile("/b/b.txt");

            Path targetPath = new Path("/c").makeQualified(fs.getUri(), fs.getWorkingDirectory());
            DistCpOptions options = new DistCpOptions(sources, targetPath);
            new DistCp(configuration, options).execute();
            Assert.assertTrue(fs.exists(new Path("/c/a/a.txt")));
            Assert.assertTrue(fs.exists(new Path("/c/b/b.txt")));
        } catch (Exception e) {
            LOG.error("Exception encountered", e);
            Assert.fail("Unexpected exception: " + e.getMessage());
        }
    }

    @Test
    public void testDynamicDistCp() throws Exception {
        try {
            clearState();
            final FileSystem fs = cluster.getFileSystem();
            Path sourcePath = new Path(SOURCE_PATH).makeQualified(fs.getUri(), fs.getWorkingDirectory());
            List<Path> sources = new ArrayList<Path>();
            sources.add(sourcePath);

            Path targetPath = new Path(TARGET_PATH).makeQualified(fs.getUri(), fs.getWorkingDirectory());
            DistCpOptions options = new DistCpOptions(sources, targetPath);
            options.setCopyStrategy("dynamic");

            options.setAtomicCommit(true);
            options.setAtomicWorkPath(new Path("/work"));
            options.setBlocking(false);
            Job job = new DistCp(configuration, options).execute();
            Path workDir = CopyOutputFormat.getWorkingDirectory(job);
            Path finalDir = CopyOutputFormat.getCommitDirectory(job);

            while (!job.isComplete()) {
                if (fs.exists(workDir)) {
                    break;
                }
            }
            job.waitForCompletion(true);
            Assert.assertFalse(fs.exists(workDir));
            Assert.assertTrue(fs.exists(finalDir));

            verifyResults();
        } catch (Exception e) {
            LOG.error("Exception encountered", e);
            Assert.fail("Unexpected exception: " + e.getMessage());
        }
    }

    private static void verifyResults() throws Exception {
        for (Path path : pathList) {
            FileSystem fs = cluster.getFileSystem();

            Path sourcePath = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
            Path targetPath = new Path(sourcePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH));

            Assert.assertTrue(fs.exists(targetPath));
            Assert.assertEquals(fs.isFile(sourcePath), fs.isFile(targetPath));
        }
    }
}