qa.qcri.nadeef.test.core.SQLTableTest.java Source code

Java tutorial

Introduction

Here is the source code for qa.qcri.nadeef.test.core.SQLTableTest.java

Source

/*
 * QCRI, NADEEF LICENSE
 * NADEEF is an extensible, generalized and easy-to-deploy data cleaning platform built at QCRI.
 * NADEEF means "Clean" in Arabic
 *
 * Copyright (c) 2011-2013, Qatar Foundation for Education, Science and Community Development (on
 * behalf of Qatar Computing Research Institute) having its principle place of business in Doha,
 * Qatar with the registered address P.O box 5825 Doha, Qatar (hereinafter referred to as "QCRI")
 *
 * NADEEF has patent pending nevertheless the following is granted.
 * NADEEF is released under the terms of the MIT License, (http://opensource.org/licenses/MIT).
 */

package qa.qcri.nadeef.test.core;

import com.google.common.base.Stopwatch;
import org.apache.commons.lang.time.StopWatch;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import qa.qcri.nadeef.core.datamodel.*;
import qa.qcri.nadeef.core.util.Bootstrap;
import qa.qcri.nadeef.core.util.CSVTools;
import qa.qcri.nadeef.core.util.sql.DBConnectionPool;
import qa.qcri.nadeef.core.util.sql.SQLDialectBase;
import qa.qcri.nadeef.core.util.sql.SQLDialectFactory;
import qa.qcri.nadeef.test.NadeefTestBase;
import qa.qcri.nadeef.test.TestDataRepository;
import qa.qcri.nadeef.tools.DBConfig;

import java.io.File;
import java.sql.Connection;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;

/**
 * SourceImport Test.
 */
@RunWith(Parameterized.class)
public class SQLTableTest extends NadeefTestBase {
    private String tableName;
    private String tableName10k;
    private DBConnectionPool connectionFactory;

    public SQLTableTest(String config_) {
        super(config_);
    }

    @Before
    public void setup() {
        try {
            Bootstrap.start(testConfig);
            DBConfig dbConfig = NadeefConfiguration.getDbConfig();
            SQLDialectBase dialectManager = SQLDialectFactory.getDialectManagerInstance(dbConfig.getDialect());
            tableName = CSVTools.dump(dbConfig, dialectManager, TestDataRepository.getDumpTestCSVFile());
            tableName10k = CSVTools.dump(dbConfig, dialectManager,
                    new File("test/src/qa/qcri/nadeef/test/input/hospital_10k.csv"));
            connectionFactory = DBConnectionPool.createDBConnectionPool(dbConfig, dbConfig);
        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail(e.getMessage());
        }
    }

    @After
    public void teardown() {
        try (Connection conn = connectionFactory.getSourceConnection(); Statement stat = conn.createStatement()) {
            SQLDialectBase dialectManager = SQLDialectFactory.getNadeefDialectManagerInstance();
            stat.execute(dialectManager.dropTable(tableName));
            stat.execute(dialectManager.dropTable(tableName10k));
            conn.commit();
            Bootstrap.shutdown();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Test
    public void testProjection() {
        SQLTable collection = new SQLTable(tableName, connectionFactory);
        collection.project(new Column(tableName + ".c"));
        Assert.assertEquals(12, collection.size());
        Tuple tuple = collection.get(0);
        Set cellSets = tuple.getCells();
        Assert.assertEquals(1, cellSets.size());
        Cell cell = (Cell) cellSets.iterator().next();
        Assert.assertTrue((tableName + ".c").equalsIgnoreCase(cell.getColumn().getFullColumnName()));
    }

    @Test
    public void testFilter() {
        SQLTable collection = new SQLTable(tableName, connectionFactory);
        collection.filter(Predicate.createEq(new Column(tableName, "c"), "c1")).project(new Column(tableName, "c"));
        Assert.assertEquals(7, collection.size());
        Tuple tuple = collection.get(0);
        Set cellSets = tuple.getCells();
        Assert.assertEquals(1, cellSets.size());
        Cell cell = (Cell) cellSets.iterator().next();
        Assert.assertTrue((tableName + ".c").equalsIgnoreCase(cell.getColumn().getFullColumnName()));
    }

    @Test
    public void testGroup() {
        SQLTable collection = new SQLTable(tableName, connectionFactory);
        Column targetColumn = new Column(tableName, "c");
        Collection<Table> result = collection.groupOn(targetColumn);
        Assert.assertEquals(3, result.size());
        for (Table t : result) {
            Tuple tuple = t.get(0);
            String value = (String) tuple.get("c");
            switch (value) {
            case "c1":
                Assert.assertEquals(7, t.size());
                break;
            case "c3":
                Assert.assertEquals(1, t.size());
                break;
            case "c2":
                Assert.assertEquals(4, t.size());
                break;
            }

        }
    }

    @Test
    public void testGroup2() {
        SQLTable table = new SQLTable(tableName10k, connectionFactory);
        Column column0 = new Column(tableName10k, "hospitalowner");
        Column column1 = new Column(tableName10k, "condition");
        List<Column> columns = new ArrayList<>();
        columns.add(column0);
        columns.add(column1);
        Collection<Table> blocks = table.groupOn(columns);
        System.out.println("Blocked " + blocks.size() + " groups.");
        for (Table x : blocks) {
            Tuple tuple = x.get(0);
            for (int i = 1; i < x.size(); i++) {
                Tuple tuple1 = x.get(i);
                Assert.assertEquals(tuple.get(column0), tuple1.get(column0));
                Assert.assertEquals(tuple.get(column0), tuple1.get(column0));
            }
        }
    }

    @Test
    public void testGroup3() {
        SQLTable table = new SQLTable(tableName10k, connectionFactory);
        Column column0 = new Column(tableName10k, "hospitalowner");
        Column column1 = new Column(tableName10k, "condition");
        List<Column> columns = new ArrayList<>();
        columns.add(column0);
        columns.add(column1);
        Collection<Table> blocks = table.groupOnConstrained(columns);
        System.out.println("Blocked " + blocks.size() + " groups.");
        for (Table x : blocks) {
            Tuple tuple = x.get(0);
            for (int i = 1; i < x.size(); i++) {
                Tuple tuple1 = x.get(i);
                Assert.assertEquals(tuple.get(column0), tuple1.get(column0));
                Assert.assertEquals(tuple.get(column0), tuple1.get(column0));
            }
        }
    }

    // @Test
    public void testSize() throws InterruptedException {
        Stopwatch stopwatch = Stopwatch.createStarted();
        SQLTable table = new SQLTable("TB_test60m", connectionFactory);
        table.get(0);

        long elapsedTime = stopwatch.elapsed(TimeUnit.MILLISECONDS);
        int mb = 1024 * 1024;

        //Getting the runtime reference from system
        Runtime runtime = Runtime.getRuntime();

        System.out.println("##### Heap utilization statistics [MB] #####");

        long usedMemory = (runtime.totalMemory() - runtime.freeMemory()) / mb;
        System.out.println("Used Memory:" + usedMemory);
        System.out.println("Free Memory:" + runtime.freeMemory() / mb);
        System.out.println("Total Memory:" + runtime.totalMemory() / mb);
        System.out.println("Max Memory:" + runtime.maxMemory() / mb);
        System.out.println("Load time: " + elapsedTime + " ms.");

        // memory usage should be less than 700 mb.
        Assert.assertTrue(usedMemory < 700);

        // load time should be less than 18000 ms
        Assert.assertTrue(elapsedTime < 18000);
    }
}