com.datatorrent.demos.mroperator.ReduceOperator.java Source code

Java tutorial

Introduction

Here is the source code for com.datatorrent.demos.mroperator.ReduceOperator.java

Source

/*
 * Copyright (c) 2013 DataTorrent, Inc. ALL Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.datatorrent.demos.mroperator;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.Operator;
import com.datatorrent.demos.mroperator.ReporterImpl.ReporterType;
import com.datatorrent.lib.util.KeyHashValPair;

/**
 * <p>ReduceOperator class.</p>
 *
 * @since 0.9.0
 */
@SuppressWarnings({ "deprecation", "unused" })
public class ReduceOperator<K1, V1, K2, V2> implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(ReduceOperator.class);

    private Class<? extends Reducer<K1, V1, K2, V2>> reduceClass;
    private transient Reducer<K1, V1, K2, V2> reduceObj;
    private transient Reporter reporter;
    private OutputCollector<K2, V2> outputCollector;
    private String configFile;

    public Class<? extends Reducer<K1, V1, K2, V2>> getReduceClass() {
        return reduceClass;
    }

    public void setReduceClass(Class<? extends Reducer<K1, V1, K2, V2>> reduceClass) {
        this.reduceClass = reduceClass;
    }

    public String getConfigFile() {
        return configFile;
    }

    public void setConfigFile(String configFile) {
        this.configFile = configFile;
    }

    private int numberOfMappersRunning = -1;
    private int operatorId;

    public final transient DefaultInputPort<KeyHashValPair<Integer, Integer>> inputCount = new DefaultInputPort<KeyHashValPair<Integer, Integer>>() {
        @Override
        public void process(KeyHashValPair<Integer, Integer> tuple) {
            logger.info("processing {}", tuple);
            if (numberOfMappersRunning == -1)
                numberOfMappersRunning = tuple.getValue();
            else
                numberOfMappersRunning += tuple.getValue();

        }

    };

    public final transient DefaultOutputPort<KeyHashValPair<K2, V2>> output = new DefaultOutputPort<KeyHashValPair<K2, V2>>();
    private Map<K1, List<V1>> cacheObject;
    public final transient DefaultInputPort<KeyHashValPair<K1, V1>> input = new DefaultInputPort<KeyHashValPair<K1, V1>>() {

        @Override
        public void process(KeyHashValPair<K1, V1> tuple) {
            // logger.info("processing tupple {}",tuple);
            List<V1> list = cacheObject.get(tuple.getKey());
            if (list == null) {
                list = new ArrayList<V1>();
                list.add(tuple.getValue());
                cacheObject.put(tuple.getKey(), list);
            } else {
                list.add(tuple.getValue());
            }
        }

    };

    @Override
    public void setup(OperatorContext context) {
        reporter = new ReporterImpl(ReporterType.Reducer, new Counters());
        if (context != null) {
            operatorId = context.getId();
        }
        cacheObject = new HashMap<K1, List<V1>>();
        outputCollector = new OutputCollectorImpl<K2, V2>();
        if (reduceClass != null) {
            try {
                reduceObj = reduceClass.newInstance();
            } catch (Exception e) {
                logger.info("can't instantiate object {}", e.getMessage());
                throw new RuntimeException(e);
            }
            Configuration conf = new Configuration();
            InputStream stream = null;
            if (configFile != null && configFile.length() > 0) {
                logger.info("system /{}", configFile);
                stream = ClassLoader.getSystemResourceAsStream("/" + configFile);
                if (stream == null) {
                    logger.info("system {}", configFile);
                    stream = ClassLoader.getSystemResourceAsStream(configFile);
                }
            }
            if (stream != null) {
                logger.info("found our stream... so adding it");
                conf.addResource(stream);
            }
            reduceObj.configure(new JobConf(conf));
        }

    }

    @Override
    public void teardown() {

    }

    @Override
    public void beginWindow(long windowId) {

    }

    @Override
    public void endWindow() {
        if (numberOfMappersRunning == 0) {
            for (Map.Entry<K1, List<V1>> e : cacheObject.entrySet()) {
                try {
                    reduceObj.reduce(e.getKey(), e.getValue().iterator(), outputCollector, reporter);
                } catch (IOException e1) {
                    logger.info(e1.getMessage());
                    throw new RuntimeException(e1);
                }
            }
            List<KeyHashValPair<K2, V2>> list = ((OutputCollectorImpl<K2, V2>) outputCollector).getList();
            for (KeyHashValPair<K2, V2> e : list) {
                output.emit(e);
            }
            list.clear();
            cacheObject.clear();
            numberOfMappersRunning = -1;
        }
    }

}