apex.benchmark.ApplicationDimensionComputation.java Source code

Java tutorial

Introduction

Here is the source code for apex.benchmark.ApplicationDimensionComputation.java

Source

/**
 * Licensed under the terms of the Apache License 2.0. Please see LICENSE file in the project root for terms.
 */
package apex.benchmark;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.net.URI;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.apex.malhar.lib.dimensions.DimensionsEvent.Aggregate;
import org.apache.apex.malhar.lib.dimensions.DimensionsEvent.InputEvent;
import org.apache.commons.lang.mutable.MutableLong;
import org.apache.hadoop.conf.Configuration;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;

import com.datatorrent.api.Context;
import com.datatorrent.api.Context.DAGContext;
import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.Context.PortContext;
import com.datatorrent.api.DAG;
import com.datatorrent.api.DAG.Locality;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.StreamCodec;
import com.datatorrent.api.StreamingApplication;
import com.datatorrent.api.annotation.ApplicationAnnotation;
import com.datatorrent.common.partitioner.StatelessPartitioner;
import com.datatorrent.contrib.dimensions.AppDataSingleSchemaDimensionStoreHDHT;
import com.datatorrent.contrib.dimensions.DimensionStoreHDHTNonEmptyQueryResultUnifier;
import com.datatorrent.contrib.hdht.tfile.TFileImpl;
import com.datatorrent.lib.appdata.schemas.SchemaUtils;
import com.datatorrent.lib.counters.BasicCounters;
import com.datatorrent.lib.dimensions.DimensionsComputationFlexibleSingleSchemaPOJO;
import com.datatorrent.lib.io.PubSubWebSocketAppDataQuery;
import com.datatorrent.lib.io.PubSubWebSocketAppDataResult;
import com.datatorrent.lib.statistics.DimensionsComputationUnifierImpl;
import com.datatorrent.lib.stream.DevNull;
import com.datatorrent.netlet.util.Slice;

/**
 * The application just include generator and dimensions computation
 * @author bright
 *
 */
@ApplicationAnnotation(name = ApplicationDimensionComputation.APP_NAME)
public class ApplicationDimensionComputation implements StreamingApplication {
    public static final String APP_NAME = "DimensionComputation";
    public static final String DIMENSION_SCHEMA = "eventSchema.json";
    private static final transient Logger logger = LoggerFactory.getLogger(ApplicationDimensionComputation.class);

    protected static final int PARTITION_NUM = 20;

    protected String eventSchemaLocation = DIMENSION_SCHEMA;
    protected String PROP_STORE_PATH;

    protected int storePartitionCount = 4;

    protected boolean includeQuery = true;

    protected static final int STREAMING_WINDOW_SIZE_MILLIS = 200;

    public ApplicationDimensionComputation() {
        this(APP_NAME);
    }

    public ApplicationDimensionComputation(String appName) {
        PROP_STORE_PATH = "dt.application." + appName + ".operator.Store.fileStore.basePathPrefix";
    }

    @Override
    public void populateDAG(DAG dag, Configuration configuration) {
        DimensionTupleGenerateOperator generateOperator = new DimensionTupleGenerateOperator();
        dag.addOperator("Generator", generateOperator);
        dag.setAttribute(generateOperator, Context.OperatorContext.PARTITIONER,
                new StatelessPartitioner<EventGenerator>(PARTITION_NUM));

        populateDimensionsDAG(dag, configuration, generateOperator.outputPort);
    }

    public void populateDimensionsDAG(DAG dag, Configuration conf, DefaultOutputPort<DimensionTuple> upstreamPort) {
        final String eventSchema = SchemaUtils.jarResourceFileToString(eventSchemaLocation);

        // dimension
        DimensionsComputationFlexibleSingleSchemaPOJO dimensions = dag.addOperator("DimensionsComputation",
                DimensionsComputationFlexibleSingleSchemaPOJO.class);

        // Set operator properties
        // key expression
        {
            Map<String, String> keyToExpression = Maps.newHashMap();
            keyToExpression.put("campaignId", DimensionTuple.CAMPAIGNID);
            keyToExpression.put("time", DimensionTuple.EVENTTIME);
            dimensions.setKeyToExpression(keyToExpression);
        }

        // aggregate expression
        {
            Map<String, String> valueToExpression = Maps.newHashMap();
            valueToExpression.put("clicks", DimensionTuple.CLICKS);
            valueToExpression.put("latency", DimensionTuple.LATENCY);

            dimensions.setAggregateToExpression(valueToExpression);
        }

        // event schema
        dimensions.setConfigurationSchemaJSON(eventSchema);
        dimensions.setUnifier(new DimensionsComputationUnifierImpl<InputEvent, Aggregate>());

        dag.setUnifierAttribute(dimensions.output, OperatorContext.MEMORY_MB, 10240);

        dag.setInputPortAttribute(dimensions.input, Context.PortContext.PARTITION_PARALLEL, true);

        // store
        AppDataSingleSchemaDimensionStoreHDHT store = createStore(dag, conf, eventSchema);
        store.setCacheWindowDuration(10000 * 5 / STREAMING_WINDOW_SIZE_MILLIS); //cache for 5 windows
        dag.addStream("GenerateStream", upstreamPort, dimensions.input).setLocality(Locality.CONTAINER_LOCAL);

        StoreStreamCodec codec = new StoreStreamCodec();
        dag.setInputPortAttribute(store.input, PortContext.STREAM_CODEC, codec);
        dag.addStream("DimensionalStream", dimensions.output, store.input);

        if (includeQuery) {
            createQuery(dag, conf, store);

            // wsOut
            PubSubWebSocketAppDataResult wsOut = createQueryResult(dag, conf, store);

            dag.addStream("QueryResult", store.queryResult, wsOut.input);
        } else {
            DevNull devNull = new DevNull();
            dag.addOperator("devNull", devNull);
            dag.addStream("QueryResult", store.queryResult, devNull.data);
        }

        dag.setAttribute(DAGContext.STREAMING_WINDOW_SIZE_MILLIS, STREAMING_WINDOW_SIZE_MILLIS);
    }

    protected static class StoreStreamCodec implements StreamCodec<Aggregate>, Serializable {
        private static final long serialVersionUID = -482870472621208905L;

        protected transient Kryo kryo;

        public StoreStreamCodec() {
            this.kryo = new Kryo();
            this.kryo.setClassLoader(Thread.currentThread().getContextClassLoader());
        }

        private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
            in.defaultReadObject();
            this.kryo = new Kryo();
            this.kryo.setClassLoader(Thread.currentThread().getContextClassLoader());
        }

        @Override
        public int getPartition(Aggregate aggregate) {
            return aggregate.getEventKey().getKey().getFieldsString()[0].hashCode();
        }

        @Override
        public Object fromByteArray(Slice fragment) {
            final Input input = new Input(fragment.buffer, fragment.offset, fragment.length);
            try {
                return kryo.readClassAndObject(input);
            } finally {
                input.close();
            }
        }

        @Override
        public Slice toByteArray(Aggregate o) {
            final Output output = new Output(32, -1);
            try {
                kryo.writeClassAndObject(output, o);
            } finally {
                output.close();
            }
            return new Slice(output.getBuffer(), 0, output.position());
        }
    }

    protected AppDataSingleSchemaDimensionStoreHDHT createStore(DAG dag, Configuration conf, String eventSchema) {
        AppDataSingleSchemaDimensionStoreHDHT store = dag.addOperator("Store", ProcessTimeAwareStore.class);
        store.setUpdateEnumValues(true);
        String basePath = Preconditions.checkNotNull(conf.get(PROP_STORE_PATH),
                "base path should be specified in the properties.xml");
        TFileImpl hdsFile = new TFileImpl.DTFileImpl();
        basePath += System.currentTimeMillis();
        hdsFile.setBasePath(basePath);

        store.setFileStore(hdsFile);
        dag.setAttribute(store, Context.OperatorContext.COUNTERS_AGGREGATOR,
                new BasicCounters.LongAggregator<MutableLong>());
        store.setConfigurationSchemaJSON(eventSchema);
        store.setPartitionCount(storePartitionCount);
        if (storePartitionCount > 1) {
            store.setPartitionCount(storePartitionCount);
            store.setQueryResultUnifier(new DimensionStoreHDHTNonEmptyQueryResultUnifier());
        }
        return store;
    }

    protected String getQueryUriString(DAG dag, Configuration conf) {
        return ConfigUtil.getAppDataQueryPubSubUriString(dag, conf);
    }

    protected URI getQueryUri(DAG dag, Configuration conf) {
        return URI.create(getQueryUriString(dag, conf));
    }

    protected PubSubWebSocketAppDataQuery createQuery(DAG dag, Configuration conf,
            AppDataSingleSchemaDimensionStoreHDHT store) {
        PubSubWebSocketAppDataQuery query = new PubSubWebSocketAppDataQuery();
        URI queryUri = getQueryUri(dag, conf);
        logger.info("QueryUri: {}", queryUri);
        query.setUri(queryUri);
        store.setEmbeddableQueryInfoProvider(query);

        return query;
    }

    protected PubSubWebSocketAppDataResult createQueryResult(DAG dag, Configuration conf,
            AppDataSingleSchemaDimensionStoreHDHT store) {
        PubSubWebSocketAppDataResult wsOut = new PubSubWebSocketAppDataResult();
        URI queryUri = getQueryUri(dag, conf);
        wsOut.setUri(queryUri);
        dag.addOperator("QueryResult", wsOut);
        // Set remaining dag options

        dag.setAttribute(store, Context.OperatorContext.COUNTERS_AGGREGATOR,
                new BasicCounters.LongAggregator<MutableLong>());

        return wsOut;
    }
}