com.cloudera.sqoop.TestMerge.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.sqoop.TestMerge.java

Source

/**
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.sqoop;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.Timestamp;
import java.sql.PreparedStatement;
import java.sql.SQLException;

import java.util.List;

import com.cloudera.sqoop.testutil.CommonArgs;
import com.cloudera.sqoop.testutil.HsqldbTestServer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import com.cloudera.sqoop.SqoopOptions.IncrementalMode;
import com.cloudera.sqoop.manager.ConnManager;
import com.cloudera.sqoop.testutil.BaseSqoopTestCase;
import com.cloudera.sqoop.tool.CodeGenTool;
import com.cloudera.sqoop.tool.ImportTool;
import com.cloudera.sqoop.tool.MergeTool;
import com.cloudera.sqoop.util.ClassLoaderStack;

/**
 * Test that the merge tool works.
 */
public class TestMerge extends BaseSqoopTestCase {

    private static final Log LOG = LogFactory.getLog(TestMerge.class.getName());

    protected ConnManager manager;
    protected Connection conn;

    public static final String SOURCE_DB_URL = "jdbc:hsqldb:mem:merge";

    @Override
    public void setUp() {
        super.setUp();
        manager = getManager();
        try {
            conn = manager.getConnection();
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    public static final String TABLE_NAME = "MergeTable";

    public Configuration newConf() {
        Configuration conf = new Configuration();
        if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
            conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
        }
        conf.set("mapred.job.tracker", "local");
        return conf;
    }

    /**
     * Create a SqoopOptions to connect to the manager.
     */
    public SqoopOptions getSqoopOptions(Configuration conf) {
        SqoopOptions options = new SqoopOptions(conf);
        options.setConnectString(HsqldbTestServer.getDbUrl());

        return options;
    }

    protected void createTable() throws SQLException {
        PreparedStatement s = conn.prepareStatement("DROP TABLE " + TABLE_NAME + " IF EXISTS");
        try {
            s.executeUpdate();
        } finally {
            s.close();
        }

        s = conn.prepareStatement(
                "CREATE TABLE " + TABLE_NAME + " (id INT NOT NULL PRIMARY KEY, val INT, lastmod TIMESTAMP)");
        try {
            s.executeUpdate();
        } finally {
            s.close();
        }

        s = conn.prepareStatement("INSERT INTO " + TABLE_NAME + " VALUES (" + "0, 0, NOW())");
        try {
            s.executeUpdate();
        } finally {
            s.close();
        }

        s = conn.prepareStatement("INSERT INTO " + TABLE_NAME + " VALUES (" + "1, 42, NOW())");
        try {
            s.executeUpdate();
        } finally {
            s.close();
        }

        conn.commit();
    }

    public void testMerge() throws Exception {
        createTable();

        // Create a jar to use for the merging process; we'll load it
        // into the current thread CL for when this runs. This needs
        // to contain a different class name than used for the imports
        // due to classloaderstack issues in the same JVM.
        final String MERGE_CLASS_NAME = "ClassForMerging";
        SqoopOptions options = getSqoopOptions(newConf());
        options.setTableName(TABLE_NAME);
        options.setClassName(MERGE_CLASS_NAME);

        CodeGenTool codeGen = new CodeGenTool();
        Sqoop codeGenerator = new Sqoop(codeGen, options.getConf(), options);
        int ret = Sqoop.runSqoop(codeGenerator, new String[0]);
        if (0 != ret) {
            fail("Nonzero exit from codegen: " + ret);
        }

        List<String> jars = codeGen.getGeneratedJarFiles();
        String jarFileName = jars.get(0);

        // Now do the imports.

        Path warehouse = new Path(BaseSqoopTestCase.LOCAL_WAREHOUSE_DIR);

        options = getSqoopOptions(newConf());
        options.setTableName(TABLE_NAME);
        options.setNumMappers(1);

        // Do an import of this data into the "old" dataset.
        options.setTargetDir(new Path(warehouse, "merge-old").toString());
        options.setIncrementalMode(IncrementalMode.DateLastModified);
        options.setIncrementalTestColumn("lastmod");

        ImportTool importTool = new ImportTool();
        Sqoop importer = new Sqoop(importTool, options.getConf(), options);
        ret = Sqoop.runSqoop(importer, new String[0]);
        if (0 != ret) {
            fail("Initial import failed with exit code " + ret);
        }

        // Check that we got records that meet our expected values.
        assertRecordStartsWith("0,0,", "merge-old");
        assertRecordStartsWith("1,42,", "merge-old");

        long prevImportEnd = System.currentTimeMillis();

        Thread.sleep(25);

        // Modify the data in the warehouse.
        PreparedStatement s = conn
                .prepareStatement("UPDATE " + TABLE_NAME + " SET val=43, lastmod=NOW() WHERE id=1");
        try {
            s.executeUpdate();
            conn.commit();
        } finally {
            s.close();
        }

        s = conn.prepareStatement("INSERT INTO " + TABLE_NAME + " VALUES (" + "3,313,NOW())");
        try {
            s.executeUpdate();
            conn.commit();
        } finally {
            s.close();
        }

        Thread.sleep(25);

        // Do another import, into the "new" dir.
        options = getSqoopOptions(newConf());
        options.setTableName(TABLE_NAME);
        options.setNumMappers(1);
        options.setTargetDir(new Path(warehouse, "merge-new").toString());
        options.setIncrementalMode(IncrementalMode.DateLastModified);
        options.setIncrementalTestColumn("lastmod");
        options.setIncrementalLastValue(new Timestamp(prevImportEnd).toString());

        importTool = new ImportTool();
        importer = new Sqoop(importTool, options.getConf(), options);
        ret = Sqoop.runSqoop(importer, new String[0]);
        if (0 != ret) {
            fail("Second import failed with exit code " + ret);
        }

        assertRecordStartsWith("1,43,", "merge-new");
        assertRecordStartsWith("3,313,", "merge-new");

        // Now merge the results!
        ClassLoaderStack.addJarFile(jarFileName, MERGE_CLASS_NAME);

        options = getSqoopOptions(newConf());
        options.setMergeOldPath(new Path(warehouse, "merge-old").toString());
        options.setMergeNewPath(new Path(warehouse, "merge-new").toString());
        options.setMergeKeyCol("ID");
        options.setTargetDir(new Path(warehouse, "merge-final").toString());
        options.setClassName(MERGE_CLASS_NAME);

        MergeTool mergeTool = new MergeTool();
        Sqoop merger = new Sqoop(mergeTool, options.getConf(), options);
        ret = Sqoop.runSqoop(merger, new String[0]);
        if (0 != ret) {
            fail("Merge failed with exit code " + ret);
        }

        assertRecordStartsWith("0,0,", "merge-final");
        assertRecordStartsWith("1,43,", "merge-final");
        assertRecordStartsWith("3,313,", "merge-final");
    }

    /**
     * @return true if the file specified by path 'p' contains a line
     * that starts with 'prefix'
     */
    protected boolean checkFileForLine(FileSystem fs, Path p, String prefix) throws IOException {
        BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(p)));
        try {
            while (true) {
                String in = r.readLine();
                if (null == in) {
                    break; // done with the file.
                }

                if (in.startsWith(prefix)) {
                    return true;
                }
            }
        } finally {
            r.close();
        }

        return false;
    }

    /**
     * Return true if there's a file in 'dirName' with a line that starts with
     * 'prefix'.
     */
    protected boolean recordStartsWith(String prefix, String dirName) throws Exception {
        Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR);
        Path targetPath = new Path(warehousePath, dirName);

        FileSystem fs = FileSystem.getLocal(new Configuration());
        FileStatus[] files = fs.listStatus(targetPath);

        if (null == files || files.length == 0) {
            fail("Got no import files!");
        }

        for (FileStatus stat : files) {
            Path p = stat.getPath();
            if (p.getName().startsWith("part-")) {
                if (checkFileForLine(fs, p, prefix)) {
                    // We found the line. Nothing further to do.
                    return true;
                }
            }
        }

        return false;
    }

    protected void assertRecordStartsWith(String prefix, String dirName) throws Exception {
        if (!recordStartsWith(prefix, dirName)) {
            fail("No record found that starts with " + prefix + " in " + dirName);
        }
    }
}