com.inmobi.conduit.local.LocalStreamServiceTest.java Source code

Java tutorial

Introduction

Here is the source code for com.inmobi.conduit.local.LocalStreamServiceTest.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.inmobi.conduit.local;

import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import java.net.URI;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Properties;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeMap;

import com.inmobi.conduit.ConduitConfig;
import com.inmobi.conduit.ConduitConfigParser;
import com.inmobi.conduit.ConduitConstants;
import com.inmobi.conduit.metrics.AbsoluteGauge;
import com.inmobi.conduit.utils.FileUtil;

import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.log4j.Logger;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.AfterSuite;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.Test;

import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table;
import com.inmobi.conduit.metrics.ConduitMetrics;
import com.inmobi.conduit.metrics.SlidingTimeWindowGauge;
import com.inmobi.conduit.AbstractService;
import com.inmobi.conduit.CheckpointProvider;
import com.inmobi.conduit.Cluster;
import com.inmobi.conduit.ClusterTest;
import com.inmobi.conduit.DestinationStream;
import com.inmobi.conduit.FSCheckpointProvider;
import com.inmobi.conduit.SourceStream;
import com.inmobi.conduit.TestMiniClusterUtil;
import com.inmobi.messaging.Message;
import com.inmobi.messaging.util.AuditUtil;

public class LocalStreamServiceTest extends TestMiniClusterUtil {
    private static Logger LOG = Logger.getLogger(LocalStreamServiceTest.class);
    private final static int NUMBER_OF_FILES = 9;
    public static final String FS_DEFAULT_NAME_KEY = "fs.default.name";
    public static final String SRC_FS_DEFAULT_NAME_KEY = "src.fs.default.name";

    Set<String> expectedResults = new LinkedHashSet<String>();
    Set<String> expectedTrashPaths = new LinkedHashSet<String>();
    Map<String, String> expectedCheckPointPaths = new HashMap<String, String>();

    @BeforeMethod
    public void beforeTest() throws Exception {
        Properties prop = new Properties();
        prop.setProperty("com.inmobi.conduit.metrics.enabled", "true");
        prop.setProperty("com.inmobi.conduit.metrics.slidingwindowtime", "100000000");
        ConduitMetrics.init(prop);
        ConduitMetrics.startAll();
    }

    @AfterMethod
    public void afterTest() throws Exception {
        ConduitMetrics.stopAll();
        ;
    }

    @BeforeSuite
    public void setup() throws Exception {
        // clean up the test data if any thing is left in the previous runs
        cleanup();
        super.setup(2, 6, 1);
        System.setProperty(ConduitConstants.AUDIT_ENABLED_KEY, "true");
        createExpectedOutput();
    }

    @AfterSuite
    public void cleanup() throws Exception {
        super.cleanup();
    }

    private void createExpectedOutput() {
        createExpectedResults();
        createExpectedTrash();
        createExpectedCheckPointPaths();
    }

    private void createExpectedCheckPointPaths() {
        expectedCheckPointPaths.put("stream1collector1", "file8");
        expectedCheckPointPaths.put("stream1collector2", "file8");
        expectedCheckPointPaths.put("stream2collector1", "file8");
        expectedCheckPointPaths.put("stream2collector2", "file8");
    }

    private void createExpectedResults() {
        expectedResults.add("/conduit/data/stream1/collector2/file1");
        expectedResults.add("/conduit/data/stream1/collector2/file2");
        expectedResults.add("/conduit/data/stream1/collector2/file3");
        expectedResults.add("/conduit/data/stream1/collector2/file4");
        expectedResults.add("/conduit/data/stream1/collector2/file5");
        expectedResults.add("/conduit/data/stream1/collector2/file6");
        expectedResults.add("/conduit/data/stream1/collector2/file7");
        expectedResults.add("/conduit/data/stream1/collector2/file8");
        expectedResults.add("/conduit/data/stream2/collector1/file1");
        expectedResults.add("/conduit/data/stream2/collector1/file2");
        expectedResults.add("/conduit/data/stream2/collector1/file3");
        expectedResults.add("/conduit/data/stream2/collector1/file4");
        expectedResults.add("/conduit/data/stream2/collector1/file5");
        expectedResults.add("/conduit/data/stream2/collector1/file6");
        expectedResults.add("/conduit/data/stream2/collector1/file7");
        expectedResults.add("/conduit/data/stream2/collector1/file8");
        expectedResults.add("/conduit/data/stream2/collector2/file1");
        expectedResults.add("/conduit/data/stream2/collector2/file2");
        expectedResults.add("/conduit/data/stream2/collector2/file3");
        expectedResults.add("/conduit/data/stream2/collector2/file4");
        expectedResults.add("/conduit/data/stream2/collector2/file5");
        expectedResults.add("/conduit/data/stream2/collector2/file6");
        expectedResults.add("/conduit/data/stream2/collector2/file7");
        expectedResults.add("/conduit/data/stream2/collector2/file8");
        expectedResults.add("/conduit/data/stream1/collector1/file1");
        expectedResults.add("/conduit/data/stream1/collector1/file2");
        expectedResults.add("/conduit/data/stream1/collector1/file3");
        expectedResults.add("/conduit/data/stream1/collector1/file4");
        expectedResults.add("/conduit/data/stream1/collector1/file5");
        expectedResults.add("/conduit/data/stream1/collector1/file6");
        expectedResults.add("/conduit/data/stream1/collector1/file7");
        expectedResults.add("/conduit/data/stream1/collector1/file8");
    }

    private void createExpectedTrash() {
        expectedTrashPaths.add("/conduit/data/stream2/collector2/file2");
        expectedTrashPaths.add("/conduit/data/stream2/collector2/file1");
        expectedTrashPaths.add("/conduit/data/stream1/collector1/file1");
        expectedTrashPaths.add("/conduit/data/stream2/collector1/file1");
        expectedTrashPaths.add("/conduit/data/stream2/collector1/file2");
        expectedTrashPaths.add("/conduit/data/stream1/collector1/file2");
        expectedTrashPaths.add("/conduit/data/stream1/collector2/file1");
        expectedTrashPaths.add("/conduit/data/stream1/collector2/file2");
    }

    private void validateExpectedOutput(Set<String> results, Set<String> trashPaths,
            Map<String, String> checkPointPaths) {
        assert results.equals(expectedResults);
        assert trashPaths.equals(expectedTrashPaths);
        assert checkPointPaths.equals(expectedCheckPointPaths);
    }

    private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception {
        FileStatus[] files = createTestData(2, "/conduit/data/stream", true);

        FileStatus[] stream1 = createTestData(2, "/conduit/data/stream1/collector", true);

        FileStatus[] stream3 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector1/file", true);

        FileStatus[] stream4 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector2/file", true);

        FileStatus[] stream2 = createTestData(2, "/conduit/data/stream2/collector", true);

        FileStatus[] stream5 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector1/file", true);

        FileStatus[] stream6 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector2/file", true);

        when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/"));
        when(fs.getUri()).thenReturn(new URI("localhost"));
        when(fs.listStatus(cluster.getDataDir())).thenReturn(files);
        when(fs.listStatus(new Path("/conduit/data/stream1"))).thenReturn(stream1);

        when(fs.listStatus(new Path("/conduit/data/stream1/collector1"),
                any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream3);
        when(fs.listStatus(new Path("/conduit/data/stream2"))).thenReturn(stream2);
        when(fs.listStatus(new Path("/conduit/data/stream1/collector2"),
                any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream4);
        when(fs.listStatus(new Path("/conduit/data/stream2/collector1"),
                any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream5);
        when(fs.listStatus(new Path("/conduit/data/stream2/collector2"),
                any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream6);

        Path file = mock(Path.class);
        when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/conduit/data/stream1/collector1/"));
    }

    private void testCreateListing() {
        try {
            Cluster cluster = ClusterTest.buildLocalCluster();
            FileSystem fs = mock(FileSystem.class);
            createMockForFileSystem(fs, cluster);

            Map<FileStatus, String> results = new TreeMap<FileStatus, java.lang.String>();
            Set<FileStatus> trashSet = new HashSet<FileStatus>();
            Table<String, String, String> checkpointPaths = HashBasedTable.create();
            fs.delete(cluster.getDataDir(), true);
            FileStatus dataDir = new FileStatus(20, false, 3, 23823, 2438232, cluster.getDataDir());
            fs.delete(new Path(cluster.getRootDir() + "/conduit-checkpoint"), true);

            Set<String> streamsToProcess = new HashSet<String>();
            streamsToProcess.add("stream1");
            streamsToProcess.add("stream2");
            TestLocalStreamService service = new TestLocalStreamService(null, cluster, null,
                    new FSCheckpointProvider(cluster.getRootDir() + "/conduit-checkpoint"), streamsToProcess);
            service.createListing(fs, dataDir, results, trashSet, checkpointPaths);

            Set<String> tmpResults = new LinkedHashSet<String>();
            // print the results
            for (FileStatus status : results.keySet()) {
                tmpResults.add(status.getPath().toString());
                LOG.debug("Results [" + status.getPath().toString() + "]");
            }

            // print the trash
            Iterator<FileStatus> it = trashSet.iterator();
            Set<String> tmpTrashPaths = new LinkedHashSet<String>();
            while (it.hasNext()) {
                FileStatus trashfile = it.next();
                tmpTrashPaths.add(trashfile.getPath().toString());
                LOG.debug("trash file [" + trashfile.getPath());
            }

            Map<String, String> tmpCheckPointPaths = new TreeMap<String, String>();
            Set<String> streams = checkpointPaths.rowKeySet();
            for (String streamName : streams) {
                Map<String, String> collectorCheckpointValueMap = checkpointPaths.row(streamName);
                for (String collector : collectorCheckpointValueMap.keySet()) {
                    String checkpointKey = AbstractService.getCheckPointKey(service.getName(), streamName,
                            collector);
                    LOG.debug("Check Pointing Key [" + checkpointKey + "] with value ["
                            + collectorCheckpointValueMap.get(collector) + "]");
                    tmpCheckPointPaths.put(checkpointKey, collectorCheckpointValueMap.get(collector));
                }
            }
            validateExpectedOutput(tmpResults, tmpTrashPaths, tmpCheckPointPaths);
            fs.delete(new Path(cluster.getRootDir() + "/conduit-checkpoint"), true);
            fs.delete(cluster.getDataDir(), true);
            fs.close();
        } catch (Exception e) {
            LOG.debug("Error in running testCreateListing", e);
            assert false;
        }
    }

    private FileStatus[] createTestData(int count, String path, boolean useSuffix) {
        FileStatus[] files = new FileStatus[count];
        for (int i = 1; i <= count; i++) {
            files[i - 1] = new FileStatus(20, false, 3, 23232, 232323,
                    new Path(path + ((useSuffix == true) ? (new Integer(i).toString()) : (""))));
        }
        return files;
    }

    private FileStatus[] createTestData(int count, String path) {
        return createTestData(count, path, false);
    }

    private ConduitConfig buildTestConduitConfig() throws Exception {
        JobConf conf = super.CreateJobConf();
        return buildTestConduitConfig(conf.get("mapred.job.tracker"), "file:///tmp", "conduit", "48", "24");
    }

    public static ConduitConfig buildTestConduitConfig(String jturl, String hdfsurl, String rootdir,
            String retentioninhours, String trashretentioninhours) throws Exception {

        Map<String, Integer> sourcestreams = new HashMap<String, Integer>();

        sourcestreams.put("cluster1", new Integer(retentioninhours));

        Map<String, SourceStream> streamMap = new HashMap<String, SourceStream>();
        streamMap.put("stream1", new SourceStream("stream1", sourcestreams, false));

        sourcestreams.clear();

        Map<String, DestinationStream> deststreamMap = new HashMap<String, DestinationStream>();
        deststreamMap.put("stream1",
                new DestinationStream("stream1", Integer.parseInt(retentioninhours), Boolean.TRUE, false));

        sourcestreams.clear();

        /*
         * sourcestreams.put("cluster2", new Integer(2)); streamMap.put("stream2",
         * new SourceStream("stream2", sourcestreams));
         */

        Set<String> sourcestreamnames = new HashSet<String>();

        for (Map.Entry<String, SourceStream> stream : streamMap.entrySet()) {
            sourcestreamnames.add(stream.getValue().getName());
        }
        Map<String, Cluster> clusterMap = new HashMap<String, Cluster>();

        clusterMap.put("cluster1", ClusterTest.buildLocalCluster(rootdir, "cluster1", hdfsurl, jturl,
                sourcestreamnames, deststreamMap));

        Map<String, String> defaults = new HashMap<String, String>();

        defaults.put(ConduitConfigParser.ROOTDIR, rootdir);
        defaults.put(ConduitConfigParser.RETENTION_IN_HOURS, retentioninhours);
        defaults.put(ConduitConfigParser.TRASH_RETENTION_IN_HOURS, trashretentioninhours);

        /*
         * clusterMap.put( "cluster2", ClusterTest.buildLocalCluster("cluster2",
         * "file:///tmp", conf.get("mapred.job.tracker")));
         */

        return new ConduitConfig(streamMap, clusterMap, defaults);
    }

    @Test
    public void testPopulateTrashPaths() throws Exception {
        FileStatus[] status = new FileStatus[10];
        String[] expectedstatus = new String[10];

        status[0] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test1/testcluster1/test1-2012-08-29-07-09_00000"));
        status[1] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test1/testcluster1/test1-2012-08-29-07-04_00000"));
        status[2] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test2/testcluster1/test2-2012-08-29-07-09_00003"));
        status[3] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test1/testcluster2/test1-2012-08-13-07-09_00000"));
        status[4] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test1/testcluster1/test1-2012-08-29-07-09_00009"));
        status[5] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test1/testcluster1/test1-2012-08-29-07-12_00000"));
        status[6] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test1/testcluster1/test1-2012-08-29-07-10_00000"));
        status[7] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test2/testcluster1/test2-2012-08-29-07-45_00000"));
        status[8] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test1/testcluster2/test1-2012-08-29-07-09_00078"));
        status[9] = new FileStatus(20, false, 3, 23823, 2438232,
                new Path("/conduit/data/test1/testcluster2/test1-2012-08-29-07-04_00034"));

        expectedstatus[0] = "/conduit/data/test1/testcluster1/test1-2012-08-29-07-04_00000";
        expectedstatus[1] = "/conduit/data/test1/testcluster1/test1-2012-08-29-07-09_00000";
        expectedstatus[2] = "/conduit/data/test1/testcluster1/test1-2012-08-29-07-09_00009";
        expectedstatus[3] = "/conduit/data/test1/testcluster1/test1-2012-08-29-07-10_00000";
        expectedstatus[4] = "/conduit/data/test1/testcluster1/test1-2012-08-29-07-12_00000";

        expectedstatus[5] = "/conduit/data/test1/testcluster2/test1-2012-08-13-07-09_00000";
        expectedstatus[6] = "/conduit/data/test1/testcluster2/test1-2012-08-29-07-04_00034";
        expectedstatus[7] = "/conduit/data/test1/testcluster2/test1-2012-08-29-07-09_00078";

        expectedstatus[8] = "/conduit/data/test2/testcluster1/test2-2012-08-29-07-09_00003";
        expectedstatus[9] = "/conduit/data/test2/testcluster1/test2-2012-08-29-07-45_00000";

        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        for (int i = 0; i < 10; ++i) {
            trashSet.add(status[i]);
        }

        ConduitConfig conduitConfig = buildTestConduitConfig();
        Cluster cluster = ClusterTest.buildLocalCluster();
        Set<String> streamsToProcess = new HashSet<String>();
        streamsToProcess.addAll(conduitConfig.getSourceStreams().keySet());
        TestLocalStreamService service = new TestLocalStreamService(conduitConfig, cluster, null,
                new FSCheckpointProvider(cluster.getCheckpointDir()), streamsToProcess);

        Map<Path, Path> trashCommitPaths = service.populateTrashCommitPaths(trashSet);

        Set<Path> srcPaths = trashCommitPaths.keySet();

        Iterator<Path> it = srcPaths.iterator();
        int i = 0;

        while (it.hasNext()) {
            String actualPath = it.next().toString();
            String expectedPath = expectedstatus[i];

            LOG.debug("Comparing Trash Paths Actual [" + actualPath + "] Expected [" + expectedPath + "]");
            Assert.assertEquals(actualPath, expectedPath);

            i++;
        }

        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "stream1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.RETRY_CHECKPOINT, "stream1").getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "stream1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "stream1").getValue().longValue(), 0);
    }

    @Test
    public void testMapReduce() throws Exception {
        LOG.info("Running LocalStreamIntegration for filename test-lss-conduit.xml");
        testMapReduce("test-lss-conduit.xml", 1, false);

        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.FAILURES, "test1")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.COMMIT_TIME, "test1")
                .getValue().longValue() < 60000);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RUNTIME, "test1")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<AbsoluteGauge>getMetric("LocalStreamService", AbstractService.LAST_FILE_PROCESSED, "test1")
                .getValue().longValue() > 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test1").getValue().longValue(), 9);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test1")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.JOB_EXECUTION_TIME, "test1").getValue().longValue() >= 0);
    }

    @Test(groups = { "integration" })
    public void testMultipleStreamMapReduce() throws Exception {
        LOG.info("Running LocalStreamIntegration for filename test-lss-multiple-conduit.xml");
        testMapReduce("test-lss-multiple-conduit.xml", 1, false);

        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.FAILURES, "test1")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.COMMIT_TIME, "test1")
                .getValue().longValue() < 60000);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RUNTIME, "test1")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<AbsoluteGauge>getMetric("LocalStreamService", AbstractService.LAST_FILE_PROCESSED, "test1")
                .getValue().longValue() > 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test1").getValue().longValue(), 9);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.FAILURES, "test2")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.COMMIT_TIME, "test2")
                .getValue().longValue() < 60000);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RUNTIME, "test2")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<AbsoluteGauge>getMetric("LocalStreamService", AbstractService.LAST_FILE_PROCESSED, "test2")
                .getValue().longValue() > 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test2")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test2")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test2")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test2").getValue().longValue(), 9);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.FAILURES, "test3")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.COMMIT_TIME, "test3")
                .getValue().longValue() < 60000);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RUNTIME, "test3")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<AbsoluteGauge>getMetric("LocalStreamService", AbstractService.LAST_FILE_PROCESSED, "test3")
                .getValue().longValue() > 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test3")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test3")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test3")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test3").getValue().longValue(), 9);
    }

    @Test(groups = { "integration" })
    public void testMultipleStreamMapReduceWithMultipleRuns() throws Exception {
        LOG.info("Running LocalStreamIntegration for filename test-lss-multiple-conduit.xml, Running Twice");
        testMapReduce("test-lss-multiple-conduit1.xml", 2, false);

        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.FAILURES, "test1")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.COMMIT_TIME, "test1")
                .getValue().longValue() < 60000);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RUNTIME, "test1")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<AbsoluteGauge>getMetric("LocalStreamService", AbstractService.LAST_FILE_PROCESSED, "test1")
                .getValue().longValue() > 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test1").getValue().longValue(), 19);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.FAILURES, "test2")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.COMMIT_TIME, "test2")
                .getValue().longValue() < 60000);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RUNTIME, "test2")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<AbsoluteGauge>getMetric("LocalStreamService", AbstractService.LAST_FILE_PROCESSED, "test2")
                .getValue().longValue() > 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test2")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test2")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test2")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test2").getValue().longValue(), 19);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.FAILURES, "test3")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.COMMIT_TIME, "test3")
                .getValue().longValue() < 60000);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RUNTIME, "test3")
                .getValue().longValue(), 0);
        Assert.assertTrue(ConduitMetrics
                .<AbsoluteGauge>getMetric("LocalStreamService", AbstractService.LAST_FILE_PROCESSED, "test3")
                .getValue().longValue() > 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test3")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test3")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test3")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test3").getValue().longValue(), 19);
    }

    @Test
    public void testThrottle() throws Exception {
        System.setProperty(ConduitConstants.FILES_PER_COLLECETOR_PER_LOCAL_STREAM,
                String.valueOf(((NUMBER_OF_FILES + 1) / 2)));
        LOG.info("Running LocalStreamIntegration for filename test-lss-conduit.xml");
        testMapReduce("test-lss-conduit.xml", 1, true);
        System.clearProperty(ConduitConstants.FILES_PER_COLLECETOR_PER_LOCAL_STREAM);
    }

    private static class NullCheckPointProvider implements CheckpointProvider {

        @Override
        public byte[] read(String key) {
            return new byte[0];
        }

        @Override
        public void checkpoint(String key, byte[] checkpoint) {
        }

        @Override
        public void close() {
        }
    }

    @Test
    public void testCopyMapperImplMethod() throws Exception {
        ConduitConfigParser parser = new ConduitConfigParser("test-lss-conduit-s3n.xml");
        Set<String> streamsToProcess = new HashSet<String>();
        ConduitConfig config = parser.getConfig();
        streamsToProcess.addAll(config.getSourceStreams().keySet());

        Set<String> clustersToProcess = new HashSet<String>();
        Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>();
        for (SourceStream sStream : config.getSourceStreams().values()) {
            for (String cluster : sStream.getSourceClusters()) {
                clustersToProcess.add(cluster);
            }
        }

        for (String clusterName : clustersToProcess) {
            Cluster cluster = config.getClusters().get(clusterName);
            cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker"));
            TestLocalStreamService service = new TestLocalStreamService(config, cluster, null,
                    new NullCheckPointProvider(), streamsToProcess);
            services.add(service);
        }

        for (TestLocalStreamService service : services) {
            Assert.assertEquals(service.getMapperClass(), S3NCopyMapper.class);
        }

        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.EMPTYDIR_CREATE, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test1").getValue().longValue(), 0);
    }

    @Test
    public void testWithOutClusterName() throws Exception {
        testClusterName("test-lss-conduit.xml", null);

        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test1").getValue().longValue(), 0);
    }

    @Test
    public void testWithClusterName() throws Exception {
        testClusterName("test-lss-conduit.xml", "testcluster2");

        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_MKDIR, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_CHECKPOINT, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics
                .<SlidingTimeWindowGauge>getMetric("LocalStreamService", AbstractService.RETRY_RENAME, "test1")
                .getValue().longValue(), 0);
        Assert.assertEquals(ConduitMetrics.<SlidingTimeWindowGauge>getMetric("LocalStreamService",
                AbstractService.FILES_COPIED_COUNT, "test1").getValue().longValue(), 0);
    }

    private void testClusterName(String configName, String currentClusterName) throws Exception {
        ConduitConfigParser parser = new ConduitConfigParser(configName);
        ConduitConfig config = parser.getConfig();
        Set<String> streamsToProcess = new HashSet<String>();
        streamsToProcess.addAll(config.getSourceStreams().keySet());
        Set<String> clustersToProcess = new HashSet<String>();
        Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>();
        Cluster currentCluster = null;
        for (SourceStream sStream : config.getSourceStreams().values()) {
            for (String cluster : sStream.getSourceClusters()) {
                clustersToProcess.add(cluster);
            }
        }
        if (currentClusterName != null) {
            currentCluster = config.getClusters().get(currentClusterName);
        }
        for (String clusterName : clustersToProcess) {
            Cluster cluster = config.getClusters().get(clusterName);
            cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker"));
            TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster,
                    new NullCheckPointProvider(), streamsToProcess);
            services.add(service);
        }

        for (TestLocalStreamService service : services) {
            FileSystem fs = service.getFileSystem();
            service.preExecute();
            if (currentClusterName != null)
                Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName);
            // creating a job with empty input path
            Path tmpJobInputPath = new Path("/tmp/job/input/path");
            Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
            Set<FileStatus> trashSet = new HashSet<FileStatus>();
            // checkpointKey, CheckPointPath
            Table<String, String, String> checkpointPaths = HashBasedTable.create();
            service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);
            Job testJobConf = service.createJob(tmpJobInputPath, 1000);
            testJobConf.waitForCompletion(true);

            int numberOfCountersPerFile = 0;
            long sumOfCounterValues = 0;
            Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()),
                    "counters");
            FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() {
                public boolean accept(Path path) {
                    return path.toString().contains("part");
                }
            });
            for (FileStatus fileSt : statuses) {
                Scanner scanner = new Scanner(fs.open(fileSt.getPath()));
                while (scanner.hasNext()) {
                    String counterNameValue = null;
                    try {
                        counterNameValue = scanner.next();
                        String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                        Assert.assertEquals(4, tmp.length);
                        Long numOfMsgs = Long.parseLong(tmp[3]);
                        numberOfCountersPerFile++;
                        sumOfCounterValues += numOfMsgs;
                    } catch (Exception e) {
                        LOG.error("Counters file has malformed line with counter name =" + counterNameValue
                                + "..skipping the line", e);
                    }
                }
            }
            // Should have 2 counters for each file
            Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile);
            // sum of all counter values should be equal to total number of messages
            Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues);

            Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                    service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
            Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY),
                    service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
            if (currentCluster == null)
                Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                        testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY));
            service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true);
        }

    }

    private void testMapReduce(String fileName, int timesToRun, boolean throttle) throws Exception {

        ConduitConfigParser parser = new ConduitConfigParser(fileName);
        ConduitConfig config = parser.getConfig();
        Set<String> streamsToProcess = new HashSet<String>();
        streamsToProcess.addAll(config.getSourceStreams().keySet());
        Set<String> clustersToProcess = new HashSet<String>();
        Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>();

        for (SourceStream sStream : config.getSourceStreams().values()) {
            for (String cluster : sStream.getSourceClusters()) {
                clustersToProcess.add(cluster);
            }
        }

        for (String clusterName : clustersToProcess) {
            Cluster cluster = config.getClusters().get(clusterName);
            cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker"));
            TestLocalStreamService service = new TestLocalStreamService(config, cluster, null,
                    new FSCheckpointProvider(cluster.getCheckpointDir()), streamsToProcess);
            services.add(service);
            service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true);
        }

        for (TestLocalStreamService service : services) {
            for (int i = 0; i < timesToRun; ++i) {
                service.preExecute();
                // set BYTES_PER_MAPPER to a lower value for test
                service.setBytesPerMapper(100);
                service.execute();
                /* If throttle is true then need to run the LocalStreamService
                   max(total num of files per stream / files per stream) times to process all files
                 */
                if (throttle) {
                    service.execute();
                }
                long finishTime = System.currentTimeMillis();
                service.postExecute();
                Thread.sleep(1000);
                /*
                 * check for number of times local stream service should run and no need
                 * of waiting if it is the last run of service
                 */
                if (timesToRun > 1 && (i < (timesToRun - 1))) {
                    long sleepTime = service.getMSecondsTillNextRun(finishTime);
                    Thread.sleep(sleepTime);
                }
            }
            service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true);
        }
    }

    @Test
    public void testFileUtil() throws Exception {
        String streamName = "test1";
        Path rootDir = new Path("/tmp/localServiceTest/testcluster2/mergeservice");
        Path dataDir = new Path(rootDir, "data/test1/testcluster2");
        FileSystem fs = dataDir.getFileSystem(new Configuration());
        fs.mkdirs(dataDir);
        String filenameStr = new String(
                streamName + "-" + TestLocalStreamService.getDateAsYYYYMMDDHHmm(new Date()) + "_00001");
        Path src = new Path(dataDir, filenameStr);

        LOG.debug("Creating Test Data with filename [" + filenameStr + "]");
        FSDataOutputStream streamout = fs.create(src);
        String content = "Creating Test data for teststream";
        Message msg = new Message(content.getBytes());
        long currentTimestamp = new Date().getTime();
        AuditUtil.attachHeaders(msg, currentTimestamp);
        byte[] encodeMsg = Base64.encodeBase64(msg.getData().array());
        streamout.write(encodeMsg);
        streamout.write("\n".getBytes());
        streamout.write(encodeMsg);
        streamout.write("\n".getBytes());
        long nextMinuteTimeStamp = currentTimestamp + 60000;
        // Genearate a msg with different timestamp.  Default window period is 60sec
        AuditUtil.attachHeaders(msg, nextMinuteTimeStamp);
        encodeMsg = Base64.encodeBase64(msg.getData().array());
        streamout.write(encodeMsg);
        streamout.close();
        Map<Long, Long> received = new HashMap<Long, Long>();
        Path target = new Path(new Path(rootDir, "system/tmp/LocalStreamService_testcluster2_test1@/"
                + "job_local_0001/attempt_local_0001_m_000000_0/"), filenameStr + ".gz");
        FileUtil.gzip(src, target, new Configuration(), received);
        Assert.assertEquals(2, received.size());
        // current timestamp window = currentTimestamp - (currentTimestamp % 60000)
        Assert.assertTrue(2 == received.get(currentTimestamp - (currentTimestamp % 60000)));
        // next timestamp window = nextMinuteTimeStamp - (nextMinuteTimeStamp %60000)
        Assert.assertTrue(1 == received.get(nextMinuteTimeStamp - (nextMinuteTimeStamp % 60000)));
        fs.delete(rootDir, true);
    }
}