com.datatorrent.lib.dedup.DedupUsingCategoricalExpiryTest.java Source code

Java tutorial

Introduction

Here is the source code for com.datatorrent.lib.dedup.DedupUsingCategoricalExpiryTest.java

Source

/**
 * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.dedup;

import java.io.IOException;
import java.util.List;
import java.util.concurrent.Exchanger;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import javax.validation.constraints.NotNull;

import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;

import com.datatorrent.api.Context;
import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.DAG;
import com.datatorrent.lib.bucket.AbstractBucket;
import com.datatorrent.lib.bucket.AbstractCategoricalBucketManager.ExpiryPolicy;
import com.datatorrent.lib.bucket.BucketManager;
import com.datatorrent.lib.bucket.CategoricalBucketManagerPOJOImpl;
import com.datatorrent.lib.bucket.CategoricalBucketManagerTest.TestEvent;
import com.datatorrent.lib.bucket.ExpirableHdfsBucketStore;
import com.datatorrent.lib.helper.OperatorContextTestHelper;
import com.datatorrent.lib.testbench.CollectorTestSink;
import com.datatorrent.lib.util.PojoUtils;
import com.datatorrent.lib.util.PojoUtils.Getter;
import com.datatorrent.lib.util.TestUtils;
import com.datatorrent.stram.engine.PortContext;

/**
 * Tests for {@link Deduper}
 */
public class DedupUsingCategoricalExpiryTest {
    private static final Logger logger = LoggerFactory.getLogger(DedupUsingCategoricalExpiryTest.class);

    private static final String APPLICATION_PATH_PREFIX = "target/DedupUsingCategoricalExpiryTest";
    private static final String APP_ID = "DedupUsingCategoricalExpiryTest";
    private static final int OPERATOR_ID = 0;

    private static final Exchanger<Long> eventBucketExchanger = new Exchanger<Long>();

    private static class DummyDeduperCategorical extends AbstractBloomFilterDeduper<Object, Object> {

        private transient Getter<Object, Object> getter;

        @Override
        public void setup(OperatorContext context) {
            ((ExpirableHdfsBucketStore<Object>) bucketManager.getBucketStore()).setConfiguration(context.getId(),
                    context.getValue(DAG.APPLICATION_PATH), partitionKeys, partitionMask);
            super.setup(context);
        }

        @Override
        public void activate(Context context) {
            super.activate(context);
            Preconditions.checkArgument(getPojoClass() != null);
            getter = PojoUtils.createGetter(getPojoClass(),
                    ((CategoricalBucketManagerPOJOImpl) bucketManager).getKeyExpression(), Object.class);
        }

        @Override
        protected Object convert(Object event) {
            return event;
        }

        public void setBucketManager(@NotNull BucketManager<Object> bucketManager) {
            this.bucketManager = Preconditions.checkNotNull(bucketManager, "storage manager");
            super.setBucketManager(bucketManager);
        }

        @Override
        public BucketManager<Object> getBucketManager() {
            return (BucketManager<Object>) bucketManager;
        }

        @Override
        protected Object getEventKey(Object event) {
            return getter.get(event);
        }

        @Override
        public void bucketLoaded(AbstractBucket<Object> bucket) {
            try {
                super.bucketLoaded(bucket);
                logger.debug("Bucket {} loaded", bucket.bucketKey);
                eventBucketExchanger.exchange(bucket.bucketKey, 1, TimeUnit.MILLISECONDS);
            } catch (TimeoutException e) {
                logger.debug("Timeout happened {}", e);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }

        public void addEventManuallyToWaiting(Object event) {
            waitingEvents.put(bucketManager.getBucketKeyFor(event), Lists.newArrayList(event));
        }

    }

    private static DummyDeduperCategorical deduper;
    private static String applicationPath;

    @Test
    public void testDedup() {
        List<TestEvent> events = Lists.newArrayList();
        int k = 1;
        int e = 10;
        for (int i = 0; i < 5; i++) {
            events.add(new TestEvent(k, e, "f" + k));
            k++;
            e += 10;
        }

        com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap attributes = new com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap();
        attributes.put(DAG.APPLICATION_ID, APP_ID);
        attributes.put(DAG.APPLICATION_PATH, applicationPath);
        attributes.put(DAG.InputPortMeta.TUPLE_CLASS, TestEvent.class);

        OperatorContext context = new OperatorContextTestHelper.TestIdOperatorContext(OPERATOR_ID, attributes);
        deduper.setup(context);
        deduper.input.setup(new PortContext(attributes, context));
        deduper.activate(context);
        CollectorTestSink<TestEvent> collectorTestSink = new CollectorTestSink<TestEvent>();
        TestUtils.setSink(deduper.output, collectorTestSink);
        CollectorTestSink<TestEvent> collectorTestSinkDup = new CollectorTestSink<TestEvent>();
        TestUtils.setSink(deduper.duplicates, collectorTestSinkDup);
        CollectorTestSink<TestEvent> collectorTestSinkExp = new CollectorTestSink<TestEvent>();
        TestUtils.setSink(deduper.expired, collectorTestSinkExp);

        logger.debug("start round 0");
        deduper.beginWindow(0);
        testRound(events);
        deduper.handleIdleTime();
        deduper.endWindow();

        Assert.assertEquals("Output tuples", 5, collectorTestSink.collectedTuples.size());
        Assert.assertEquals("Duplicate tuples", 0, collectorTestSinkDup.collectedTuples.size());
        Assert.assertEquals("Expired tuples", 0, collectorTestSinkExp.collectedTuples.size());
        collectorTestSink.clear();
        collectorTestSinkDup.clear();
        collectorTestSinkExp.clear();
        logger.debug("end round 0");
        deduper.teardown();
    }

    private void testRound(List<TestEvent> events) {
        for (TestEvent event : events) {
            deduper.input.process(event);
        }
    }

    @Test
    public void testDeduperRedeploy() throws Exception {
        com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap attributes = new com.datatorrent.api.Attribute.AttributeMap.DefaultAttributeMap();
        attributes.put(DAG.APPLICATION_ID, APP_ID);
        attributes.put(DAG.APPLICATION_PATH, applicationPath);

        deduper.addEventManuallyToWaiting(new TestEvent(6, 60, "f5"));
        deduper.setup(new OperatorContextTestHelper.TestIdOperatorContext(0, attributes));
        eventBucketExchanger.exchange(null, 500, TimeUnit.MILLISECONDS);
        deduper.endWindow();
        deduper.teardown();
    }

    @BeforeClass
    public static void setup() {
        applicationPath = OperatorContextTestHelper.getUniqueApplicationPath(APPLICATION_PATH_PREFIX);
        ExpirableHdfsBucketStore<Object> bucketStore = new ExpirableHdfsBucketStore<Object>();
        deduper = new DummyDeduperCategorical();
        CategoricalBucketManagerPOJOImpl storageManager = new CategoricalBucketManagerPOJOImpl();
        storageManager.setExpiryBuckets(5);
        storageManager.setPolicy(ExpiryPolicy.LRU);
        storageManager.setMillisPreventingBucketEviction(60000);
        storageManager.setBucketStore(bucketStore);
        storageManager.setKeyExpression("{$}.getEventKey()");
        storageManager.setExpiryExpression("{$}.getExpiryKey()");
        deduper.setBucketManager(storageManager);
    }

    @AfterClass
    public static void teardown() {
        Path root = new Path(applicationPath);
        try {
            FileSystem fs = FileSystem.newInstance(root.toUri(), new Configuration());
            fs.delete(root, true);
            logger.debug("Deleted path: " + applicationPath);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}