co.cask.cdap.examples.purchase.PurchaseHistoryBuilder.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.examples.purchase.PurchaseHistoryBuilder.java

Source

/*
 * Copyright  2014-2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.examples.purchase;

import co.cask.cdap.api.ProgramLifecycle;
import co.cask.cdap.api.Resources;
import co.cask.cdap.api.annotation.UseDataSet;
import co.cask.cdap.api.data.batch.Input;
import co.cask.cdap.api.data.batch.Output;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.mapreduce.AbstractMapReduce;
import co.cask.cdap.api.mapreduce.MapReduceContext;
import co.cask.cdap.api.metrics.Metrics;
import co.cask.common.http.HttpRequest;
import co.cask.common.http.HttpRequests;
import co.cask.common.http.HttpResponse;
import com.google.gson.Gson;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Map;

/**
 * MapReduce that reads purchases from the purchases DataSet and creates a purchase history for every user
 */
public class PurchaseHistoryBuilder extends AbstractMapReduce {
    public static final String MAPPER_MEMORY_MB = "mapper.memory.mb";
    public static final String REDUCER_MEMORY_MB = "reducer.memory.mb";

    @Override
    public void configure() {
        setDescription("Purchase History Builder");
        setDriverResources(new Resources(1024));
        setMapperResources(new Resources(1024));
        setReducerResources(new Resources(1024));
    }

    @Override
    public void beforeSubmit(MapReduceContext context) throws Exception {
        Job job = context.getHadoopJob();
        job.setReducerClass(PerUserReducer.class);

        context.addInput(Input.ofDataset("purchases"), PurchaseMapper.class);
        context.addOutput(Output.ofDataset("history"));

        // override default memory usage if the corresponding runtime arguments are set.
        Map<String, String> runtimeArgs = context.getRuntimeArguments();
        String mapperMemoryMBStr = runtimeArgs.get(MAPPER_MEMORY_MB);
        if (mapperMemoryMBStr != null) {
            context.setMapperResources(new Resources(Integer.parseInt(mapperMemoryMBStr)));
        }
        String reducerMemoryMBStr = runtimeArgs.get(REDUCER_MEMORY_MB);
        if (reducerMemoryMBStr != null) {
            context.setReducerResources(new Resources(Integer.parseInt(reducerMemoryMBStr)));
        }
    }

    /**
     * Mapper class to emit user and corresponding purchase information
     */
    public static class PurchaseMapper extends Mapper<byte[], Purchase, Text, Purchase> {

        private Metrics mapMetrics;

        @Override
        public void map(byte[] key, Purchase purchase, Context context) throws IOException, InterruptedException {
            String user = purchase.getCustomer();
            if (purchase.getPrice() > 100000) {
                mapMetrics.count("purchases.large", 1);
            }
            context.write(new Text(user), purchase);
        }
    }

    /**
     * Reducer class to aggregate all purchases per user
     */
    public static class PerUserReducer extends Reducer<Text, Purchase, String, PurchaseHistory>
            implements ProgramLifecycle<MapReduceContext> {

        @UseDataSet("frequentCustomers")
        private KeyValueTable frequentCustomers;

        private Metrics reduceMetrics;

        private URL userProfileServiceURL;

        private static final int RARE_PURCHASE_COUNT = 1;
        private static final int FREQUENT_PURCHASE_COUNT = 10;

        private static final Logger LOG = LoggerFactory.getLogger(PerUserReducer.class);

        @Override
        public void initialize(MapReduceContext context) throws Exception {
            userProfileServiceURL = context.getServiceURL(UserProfileServiceHandler.SERVICE_NAME);
        }

        public void reduce(Text customer, Iterable<Purchase> values, Context context)
                throws IOException, InterruptedException {
            UserProfile userProfile = null;
            try {
                URL url = new URL(userProfileServiceURL,
                        UserProfileServiceHandler.USER_ENDPOINT + "/" + customer.toString());

                HttpRequest request = HttpRequest.get(url).build();
                HttpResponse response = HttpRequests.execute(request);
                if (response.getResponseCode() != HttpURLConnection.HTTP_NO_CONTENT) {
                    userProfile = new Gson().fromJson(response.getResponseBodyAsString(), UserProfile.class);
                }
            } catch (Exception e) {
                LOG.warn("Error accessing user profile: {}", e.getCause());
            }

            PurchaseHistory purchases = new PurchaseHistory(customer.toString(), userProfile);
            int numPurchases = 0;
            for (Purchase val : values) {
                purchases.add(new Purchase(val));
                numPurchases++;
            }
            if (numPurchases == RARE_PURCHASE_COUNT) {
                reduceMetrics.count("customers.rare", 1);
            } else if (numPurchases > FREQUENT_PURCHASE_COUNT) {
                reduceMetrics.count("customers.frequent", 1);
                frequentCustomers.write(customer.toString(), String.valueOf(numPurchases));
            }

            context.write(customer.toString(), purchases);
        }

        @Override
        public void destroy() {
            // no-op
        }
    }
}