cascading.ClusterTestCase.java Source code

Java tutorial

Introduction

Here is the source code for cascading.ClusterTestCase.java

Source

/*
 * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Cascading is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cascading is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Cascading.  If not, see <http://www.gnu.org/licenses/>.
 */

package cascading;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

import cascading.flow.Flow;
import cascading.flow.MultiMapReducePlanner;
import cascading.scheme.SequenceFile;
import cascading.scheme.TextLine;
import cascading.tap.Hfs;
import cascading.tuple.Fields;
import cascading.tuple.TupleEntryIterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;

/**
 *
 */
public class ClusterTestCase extends CascadingTestCase {
    public static final String CLUSTER_TESTING_PROPERTY = "test.cluster.enabled";

    transient private static MiniDFSCluster dfs;
    transient private static FileSystem fileSys;
    transient private static MiniMRCluster mr;
    transient private static JobConf jobConf;
    transient private static Map<Object, Object> properties = new HashMap<Object, Object>();
    transient private boolean enableCluster;

    int numMapTasks = 4;
    int numReduceTasks = 1;

    private String logger;

    public ClusterTestCase(String string, boolean enableCluster) {
        super(string);

        if (!enableCluster)
            this.enableCluster = false;
        else
            this.enableCluster = Boolean
                    .parseBoolean(System.getProperty(CLUSTER_TESTING_PROPERTY, Boolean.toString(enableCluster)));

        this.logger = System.getProperty("log4j.logger");
    }

    public ClusterTestCase(String string, boolean enableCluster, int numMapTasks, int numReduceTasks) {
        this(string, enableCluster);
        this.numMapTasks = numMapTasks;
        this.numReduceTasks = numReduceTasks;
    }

    public ClusterTestCase(String string) {
        super(string);
    }

    public ClusterTestCase() {
    }

    public boolean isEnableCluster() {
        return enableCluster;
    }

    public void setUp() throws IOException {
        if (jobConf != null)
            return;

        if (!enableCluster) {
            jobConf = new JobConf();
        } else {
            System.setProperty("test.build.data", "build");
            new File("build/test/log").mkdirs();
            System.setProperty("hadoop.log.dir", "build/test/log");
            Configuration conf = new Configuration();

            dfs = new MiniDFSCluster(conf, 4, true, null);
            fileSys = dfs.getFileSystem();
            mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1);

            jobConf = mr.createJobConf();

            jobConf.set("mapred.child.java.opts", "-Xmx512m");
            jobConf.setMapSpeculativeExecution(false);
            jobConf.setReduceSpeculativeExecution(false);
        }

        jobConf.setNumMapTasks(numMapTasks);
        jobConf.setNumReduceTasks(numReduceTasks);

        if (logger != null)
            properties.put("log4j.logger", logger);

        Flow.setJobPollingInterval(properties, 500); // should speed up tests
        MultiMapReducePlanner.setJobConf(properties, jobConf);
    }

    public Map<Object, Object> getProperties() {
        return new HashMap<Object, Object>(properties);
    }

    public JobConf getJobConf() {
        return new JobConf(jobConf);
    }

    public FileSystem getFileSystem() throws IOException {
        if (fileSys != null)
            return fileSys;

        return FileSystem.get(jobConf);
    }

    public String makeQualifiedPath(String path) throws IOException {
        return new Path(path).makeQualified(getFileSystem()).toString();
    }

    protected void copyFromLocal(String inputFile) throws IOException {
        if (!enableCluster)
            return;

        Path path = new Path(inputFile);

        if (!fileSys.exists(path))
            FileUtil.copy(new File(inputFile), fileSys, path, false, jobConf);
    }

    public void tearDown() throws IOException {
        // do nothing, let the jvm shut things down
    }

    public void validateLengthText(String path, int length) throws IOException {
        validateLengthText(path, length, null);
    }

    public void validateLengthText(String path, int length, String regex) throws IOException {
        Hfs tap = new Hfs(new TextLine(new Fields("line")), path);

        assertTrue("path does not exist: " + path, tap.pathExists(getJobConf()));

        TupleEntryIterator iterator = tap.openForRead(getJobConf());

        try {
            validateLength(iterator, length, -1, regex == null ? null : Pattern.compile(regex));
        } finally {
            if (iterator != null)
                iterator.close();
        }
    }

    public void validateLengthSequence(String path, Fields fields, int length) throws IOException {
        validateLengthSequence(path, fields, length, null);
    }

    public void validateLengthSequence(String path, Fields fields, int length, String regex) throws IOException {
        Hfs tap = new Hfs(new SequenceFile(fields), path);

        assertTrue("path does not exist: " + path, tap.pathExists(getJobConf()));

        TupleEntryIterator iterator = tap.openForRead(getJobConf());

        try {
            validateLength(iterator, length, -1, regex == null ? null : Pattern.compile(regex));
        } finally {
            if (iterator != null)
                iterator.close();
        }
    }
}