com.linkedin.pinot.integration.tests.HybridClusterIntegrationTestCommandLineRunner.java Source code

Introduction

Here is the source code for com.linkedin.pinot.integration.tests.HybridClusterIntegrationTestCommandLineRunner.java
Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.linkedin.pinot.integration.tests;

import com.google.common.base.Preconditions;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.utils.KafkaStarterUtils;
import com.linkedin.pinot.controller.ControllerConf;
import com.linkedin.pinot.tools.query.comparison.QueryComparison;
import com.linkedin.pinot.util.TestUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import javax.annotation.Nonnull;
import kafka.server.KafkaServerStartable;
import org.apache.commons.io.FileUtils;
import org.json.JSONObject;
import org.testng.Assert;
import org.testng.ITestResult;
import org.testng.TestListenerAdapter;
import org.testng.TestNG;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

/**
 * A command line runner to invoke the customized hybrid cluster integration test via command-line.
 *
 * <p>The arguments expected are as follows:
 * <ul>
 *   <li>
 *     --llc: Whether to use LLC or HLC for the realtime table
 *   </li>
 *   <li>
 *     tableName: Table name
 *   </li>
 *   <li>
 *     schemaFile: Path of the Pinot schema file
 *   </li>
 *   <li>
 *     dataDir: Path of the data directory which should includes a directory called "avro-files" under which all the
 *     Avro files reside, a file called "queries.txt" with all the queries and a file called "scan-responses.txt" with
 *     all the expected responses
 *   </li>
 *   <li>
 *     invertedIndexColumns: A list of comma-separated columns which we want to generate inverted index on
 *   </li>
 *   <li>
 *     sortedColumn: The sorted column to be used for building realtime segments
 *   </li>
 * </ul>
 *
 * The command can be invoked as follows:
 * <p>CLASSPATH_PREFIX=pinot-integration-tests/target/pinot-integration-tests-*-tests.jar
 * <p>pinot-integration-tests/target/pinot-integration-tests-pkg/bin/pinot-hybrid-cluster-test.sh args...
 */
public class HybridClusterIntegrationTestCommandLineRunner {
    private HybridClusterIntegrationTestCommandLineRunner() {
    }

    public static void printUsage() {
        System.err.println(
                "Usage: pinot-hybrid-cluster.sh [--llc] tableName schemaFile dataDir invertedIndexColumns sortedColumn");
        System.exit(1);
    }

    public static void main(String[] args) throws Exception {
        int numArgs = args.length;
        if (!((numArgs == 5) || (numArgs == 6 && args[0].equals("--llc")))) {
            printUsage();
        }

        CustomHybridClusterIntegrationTest._enabled = true;

        int argIdx = 0;
        if (args[0].equals("--llc")) {
            CustomHybridClusterIntegrationTest._useLlc = true;
            argIdx++;
        }

        CustomHybridClusterIntegrationTest._tableName = args[argIdx++];
        File schemaFile = new File(args[argIdx++]);
        Preconditions.checkState(schemaFile.isFile());
        CustomHybridClusterIntegrationTest._schemaFile = schemaFile;
        File dataDir = new File(args[argIdx++]);
        Preconditions.checkState(dataDir.isDirectory());
        CustomHybridClusterIntegrationTest._dataDir = dataDir;
        CustomHybridClusterIntegrationTest._invertedIndexColumns = Arrays.asList(args[argIdx++].split(","));
        CustomHybridClusterIntegrationTest._sortedColumn = args[argIdx];

        TestListenerAdapter testListenerAdapter = new TestListenerAdapter();
        TestNG testNG = new TestNG();
        testNG.setTestClasses(new Class[] { CustomHybridClusterIntegrationTest.class });
        testNG.addListener(testListenerAdapter);
        testNG.run();

        System.out.println(testListenerAdapter.toString());
        boolean success = true;
        List<ITestResult> skippedTests = testListenerAdapter.getSkippedTests();
        if (!skippedTests.isEmpty()) {
            System.out.println("Skipped tests: " + skippedTests);
            for (ITestResult skippedTest : skippedTests) {
                System.out.println(skippedTest.getName() + ": " + skippedTest.getThrowable());
            }
            success = false;
        }
        List<ITestResult> failedTests = testListenerAdapter.getFailedTests();
        if (!failedTests.isEmpty()) {
            System.err.println("Failed tests: " + failedTests);
            for (ITestResult failedTest : failedTests) {
                System.out.println(failedTest.getName() + ": " + failedTest.getThrowable());
            }
            success = false;
        }
        if (success) {
            System.exit(0);
        } else {
            System.exit(1);
        }
    }

    /**
     * Custom hybrid cluster integration test that runs queries from a query file and compares with the pre-generated
     * responses. It takes at least three Avro files and creates offline segments for the first two, while it pushes all
     * Avro files except the first one into Kafka.
     * <p>
     * Each Avro file is pushed in order into Kafka, and after each Avro file is pushed, we run all queries from the query
     * file given when the record count is stabilized. This makes it so that we cover as many test conditions as possible
     * (eg. partially in memory, partially on disk with data overlap between offline and realtime) and should give us good
     * confidence that the cluster behaves as it should.
     */
    public static final class CustomHybridClusterIntegrationTest extends BaseClusterIntegrationTest {
        private static final String TENANT_NAME = "TestTenant";
        private static final int ZK_PORT = 3191;
        private static final String ZK_STR = "localhost:" + ZK_PORT;
        private static final String KAFKA_ZK_STR = ZK_STR + "/kafka";
        private static final int KAFKA_PORT = 20092;
        private static final String KAFKA_BROKER = "localhost:" + KAFKA_PORT;
        private static final int CONTROLLER_PORT = 9998;
        private static final int BROKER_BASE_PORT = 19099;
        private static final int SERVER_BASE_ADMIN_API_PORT = 9097;
        private static final int SERVER_BASE_NETTY_PORT = 9098;

        private static final String AVRO_DIR = "avro-files";
        private static final String QUERY_FILE_NAME = "queries.txt";
        private static final String RESPONSE_FILE_NAME = "scan-responses.txt";
        private static final int NUM_OFFLINE_SEGMENTS = 2;

        private static boolean _enabled = false;
        private static boolean _useLlc = false;
        private static String _tableName;
        private static File _schemaFile;
        private static File _dataDir;
        private static List<String> _invertedIndexColumns;
        private static String _sortedColumn;

        private List<File> _offlineAvroFiles;
        private List<File> _realtimeAvroFiles;
        private File _queryFile;
        private File _responseFile;
        private KafkaServerStartable _kafkaStarter;
        private long _countStarResult;

        public CustomHybridClusterIntegrationTest() {
            if (!_enabled) {
                return;
            }

            File[] avroFiles = new File(_dataDir, AVRO_DIR).listFiles();
            Assert.assertNotNull(avroFiles);
            int numAvroFiles = avroFiles.length;
            Assert.assertTrue(numAvroFiles > 2, "Need at least 3 Avro files to run the test");
            Arrays.sort(avroFiles);

            _offlineAvroFiles = new ArrayList<>(NUM_OFFLINE_SEGMENTS);
            _realtimeAvroFiles = new ArrayList<>(numAvroFiles - 1);
            for (int i = 0; i < numAvroFiles; i++) {
                if (i < NUM_OFFLINE_SEGMENTS) {
                    _offlineAvroFiles.add(avroFiles[i]);
                }
                if (i > 0) {
                    _realtimeAvroFiles.add(avroFiles[i]);
                }
            }

            _queryFile = new File(_dataDir, QUERY_FILE_NAME);
            Assert.assertTrue(_queryFile.isFile());

            _responseFile = new File(_dataDir, RESPONSE_FILE_NAME);
            Assert.assertTrue(_responseFile.isFile());
        }

        @Nonnull
        @Override
        protected String getTableName() {
            return _tableName;
        }

        @Override
        protected long getCountStarResult() {
            return _countStarResult;
        }

        @Override
        protected boolean useLlc() {
            return _useLlc;
        }

        @Override
        protected int getRealtimeSegmentFlushSize() {
            return super.getRealtimeSegmentFlushSize() * 100;
        }

        @Override
        protected long getCurrentCountStarResult() throws Exception {
            return postQuery("SELECT COUNT(*) FROM " + getTableName()).getJSONArray("aggregationResults")
                    .getJSONObject(0).getLong("value");
        }

        @BeforeClass
        public void setUp() throws Exception {
            if (!_enabled) {
                return;
            }

            TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir);

            // Start Zk and Kafka
            startZk(ZK_PORT);
            _kafkaStarter = KafkaStarterUtils.startServer(KAFKA_PORT, KafkaStarterUtils.DEFAULT_BROKER_ID,
                    KAFKA_ZK_STR, KafkaStarterUtils.getDefaultKafkaConfiguration());

            // Create Kafka topic
            KafkaStarterUtils.createTopic(getKafkaTopic(), KAFKA_ZK_STR, getNumKafkaPartitions());

            // Start the Pinot cluster
            ControllerConf config = getDefaultControllerConfiguration();
            config.setControllerPort(Integer.toString(CONTROLLER_PORT));
            config.setZkStr(ZK_STR);
            config.setTenantIsolationEnabled(false);
            startController(config);
            startBroker(BROKER_BASE_PORT, ZK_STR);
            startServers(2, SERVER_BASE_ADMIN_API_PORT, SERVER_BASE_NETTY_PORT, ZK_STR);

            // Create tenants
            createBrokerTenant(TENANT_NAME, 1);
            createServerTenant(TENANT_NAME, 1, 1);

            // Create segments from Avro data
            ExecutorService executor = Executors.newCachedThreadPool();
            Schema schema = Schema.fromFile(_schemaFile);
            ClusterIntegrationTestUtils.buildSegmentsFromAvro(_offlineAvroFiles, 0, _segmentDir, _tarDir,
                    _tableName, false, getRawIndexColumns(), schema, executor);
            executor.shutdown();
            executor.awaitTermination(10, TimeUnit.MINUTES);

            // Create Pinot table
            String schemaName = schema.getSchemaName();
            addSchema(_schemaFile, schemaName);
            String timeColumnName = schema.getTimeColumnName();
            Assert.assertNotNull(timeColumnName);
            TimeUnit outgoingTimeUnit = schema.getOutgoingTimeUnit();
            Assert.assertNotNull(outgoingTimeUnit);
            String timeType = outgoingTimeUnit.toString();
            addHybridTable(_tableName, _useLlc, KAFKA_BROKER, KAFKA_ZK_STR, getKafkaTopic(),
                    getRealtimeSegmentFlushSize(), _realtimeAvroFiles.get(0), timeColumnName, timeType, schemaName,
                    TENANT_NAME, TENANT_NAME, "MMAP", _sortedColumn, _invertedIndexColumns, null, null);

            // Upload all segments
            uploadSegments(_tarDir);
        }

        @Test
        public void testQueriesFromQueryFile() throws Exception {
            if (!_enabled) {
                return;
            }

            // Do not compare numDocsScanned
            QueryComparison.setCompareNumDocs(false);

            final AtomicInteger numFailedQueries = new AtomicInteger();
            try (BufferedReader responseFileReader = new BufferedReader(new FileReader(_responseFile))) {
                for (File realtimeAvroFile : _realtimeAvroFiles) {
                    // Push one avro file into the Kafka topic
                    ClusterIntegrationTestUtils.pushAvroIntoKafka(Collections.singletonList(realtimeAvroFile),
                            KAFKA_BROKER, getKafkaTopic(), getMaxNumKafkaMessagesPerBatch(),
                            getKafkaMessageHeader(), getPartitionColumn());

                    try (BufferedReader queryFileReader = new BufferedReader(new FileReader(_queryFile))) {
                        // Set the expected COUNT(*) result and wait for all documents loaded
                        responseFileReader.mark(4096);
                        _countStarResult = new JSONObject(responseFileReader.readLine()).getLong("totalDocs");
                        responseFileReader.reset();
                        waitForAllDocsLoaded(600_000L);

                        // Run queries in multiple threads
                        ExecutorService executorService = new ThreadPoolExecutor(4, 4, 0L, TimeUnit.MILLISECONDS,
                                new LinkedBlockingQueue<Runnable>(10), new ThreadPoolExecutor.CallerRunsPolicy());
                        String query;
                        while ((query = queryFileReader.readLine()) != null) {
                            final String currentQuery = query;
                            final JSONObject expectedResponse = new JSONObject(responseFileReader.readLine());
                            executorService.execute(new Runnable() {
                                @Override
                                public void run() {
                                    try {
                                        JSONObject actualResponse = postQuery(currentQuery,
                                                "http://localhost:" + BROKER_BASE_PORT);
                                        if (QueryComparison.compareWithEmpty(actualResponse,
                                                expectedResponse) == QueryComparison.ComparisonStatus.FAILED) {
                                            numFailedQueries.getAndIncrement();
                                            System.out.println("Query comparison failed for query: " + currentQuery
                                                    + "\nActual: " + actualResponse.toString(2) + "\nExpected: "
                                                    + expectedResponse.toString(2));
                                        }
                                    } catch (Exception e) {
                                        numFailedQueries.getAndIncrement();
                                        System.out
                                                .println("Caught exception while comparing query: " + currentQuery);
                                        e.printStackTrace();
                                    }
                                }
                            });
                        }
                        executorService.shutdown();
                        executorService.awaitTermination(10, TimeUnit.MINUTES);
                    }
                }
            }

            Assert.assertEquals(numFailedQueries.get(), 0, "Caught " + numFailedQueries + " failed queries");
        }

        @AfterClass
        public void tearDown() throws Exception {
            if (!_enabled) {
                return;
            }

            String tableName = getTableName();
            dropOfflineTable(tableName);
            dropRealtimeTable(tableName);

            stopServer();
            stopBroker();
            stopController();
            KafkaStarterUtils.stopServer(_kafkaStarter);
            stopZk();

            FileUtils.deleteDirectory(_tempDir);
        }
    }
}