org.apache.pig.piggybank.test.storage.TestMultiStorage.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.pig.piggybank.test.storage.TestMultiStorage.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
 * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License is
 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and limitations under the License.
 */
package org.apache.pig.piggybank.test.storage;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.test.MiniCluster;
import org.apache.pig.test.Util;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import junit.framework.Assert;
import junit.framework.TestCase;

public class TestMultiStorage extends TestCase {
    private static final String INPUT_FILE = "MultiStorageInput.txt";

    private PigServer pigServer;
    private PigServer pigServerLocal;

    private MiniCluster cluster = MiniCluster.buildCluster();

    public TestMultiStorage() throws ExecException, IOException {
        pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
        pigServerLocal = new PigServer(ExecType.LOCAL);
    }

    public static final PathFilter hiddenPathFilter = new PathFilter() {
        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    };

    private void createFile() throws IOException {
        PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE));
        w.println("100\tapple\taaa1");
        w.println("200\torange\tbbb1");
        w.println("300\tstrawberry\tccc1");

        w.println("101\tapple\taaa2");
        w.println("201\torange\tbbb2");
        w.println("301\tstrawberry\tccc2");

        w.println("102\tapple\taaa3");
        w.println("202\torange\tbbb3");
        w.println("302\tstrawberry\tccc3");

        w.close();
        Util.deleteFile(cluster, INPUT_FILE);
        Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE);
    }

    @Override
    @Before
    public void setUp() throws Exception {
        createFile();
        FileSystem fs = FileSystem.getLocal(new Configuration());
        Path localOut = new Path("local-out");
        Path dummy = new Path("dummy");
        if (fs.exists(localOut)) {
            fs.delete(localOut, true);
        }
        if (fs.exists(dummy)) {
            fs.delete(dummy, true);
        }
    }

    @Override
    @After
    public void tearDown() throws Exception {
        new File(INPUT_FILE).delete();
        Util.deleteFile(cluster, INPUT_FILE);
        cluster.shutDown();
    }

    enum Mode {
        local, cluster
    };

    @Test
    public void testMultiStorage() throws IOException {
        final String LOAD = "A = LOAD '" + INPUT_FILE + "' as (id, name, n);";
        final String MULTI_STORE_CLUSTER = "STORE A INTO 'mr-out' USING "
                + "org.apache.pig.piggybank.storage.MultiStorage('mr-out', '1');";
        final String MULTI_STORE_LOCAL = "STORE A INTO 'dummy' USING "
                + "org.apache.pig.piggybank.storage.MultiStorage('local-out', '1');";

        System.out.print("Testing in LOCAL mode: ...");
        //testMultiStorage(Mode.local, "local-out", LOAD, MULTI_STORE_LOCAL);
        System.out.println("Succeeded!");

        System.out.print("Testing in CLUSTER mode: ...");
        testMultiStorage(Mode.cluster, "mr-out", LOAD, MULTI_STORE_CLUSTER);
        System.out.println("Succeeded!");

    }

    /**
     * The actual method that run the test in local or cluster mode. 
     * 
     * @param pigServer
     * @param mode
     * @param queries
     * @throws IOException
     */
    private void testMultiStorage(Mode mode, String outPath, String... queries) throws IOException {
        PigServer pigServer = (Mode.local == mode) ? this.pigServerLocal : this.pigServer;
        pigServer.setBatchOn();
        for (String query : queries) {
            pigServer.registerQuery(query);
        }
        pigServer.executeBatch();
        verifyResults(mode, outPath);
    }

    /**
     * Test if records are split into directories corresponding to split field
     * values
     * 
     * @param mode
     * @throws IOException
     */
    private void verifyResults(Mode mode, String outPath) throws IOException {
        FileSystem fs = (Mode.local == mode ? FileSystem.getLocal(new Configuration()) : cluster.getFileSystem());
        Path output = new Path(outPath);
        Assert.assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir());

        Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
        Assert.assertTrue("Split field dirs not found!", paths != null);

        for (Path path : paths) {
            String splitField = path.getName();
            Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
            Assert.assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
            for (Path filePath : files) {
                Assert.assertTrue("This shouldn't be a directory", fs.isFile(filePath));

                BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(filePath)));
                String line = "";
                int count = 0;
                while ((line = reader.readLine()) != null) {
                    String[] fields = line.split("\\t");
                    Assert.assertEquals(fields.length, 3);
                    Assert.assertEquals("Unexpected field value in the output record", splitField, fields[1]);
                    count++;
                    System.out.println("field: " + fields[1]);
                }
                reader.close();
                Assert.assertEquals(count, 3);
            }
        }
    }
}