com.cloudera.oryx.als.computation.LoadRunner.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.oryx.als.computation.LoadRunner.java

Source

/*
 * Copyright (c) 2013, Cloudera, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */

package com.cloudera.oryx.als.computation;

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collection;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.oryx.als.common.OryxRecommender;
import com.cloudera.oryx.common.io.IOUtils;
import com.cloudera.oryx.common.iterator.FileLineIterable;
import com.cloudera.oryx.common.parallel.ExecutorUtils;
import com.cloudera.oryx.common.random.RandomManager;
import com.cloudera.oryx.common.io.DelimitedDataUtils;

/**
 * Runs a mixed, concurrent load against a given recommender instance.
 * 
 * @author Sean Owen
 */
public final class LoadRunner implements Callable<Object> {

    private static final Logger log = LoggerFactory.getLogger(LoadRunner.class);

    private final OryxRecommender client;
    private final String[] uniqueUserIDs;
    private final String[] uniqueItemIDs;
    private final int steps;

    /**
     * @param client recommender to load
     * @param dataDirectory a directory containing data files from which user and item IDs should be read
     * @param steps number of load steps to run
     */
    public LoadRunner(OryxRecommender client, File dataDirectory, int steps) throws IOException {
        Preconditions.checkNotNull(client);
        Preconditions.checkNotNull(dataDirectory);
        Preconditions.checkArgument(steps > 0);

        log.info("Reading IDs...");
        Set<String> userIDsSet = Sets.newHashSet();
        Set<String> itemIDsSet = Sets.newHashSet();
        for (File f : dataDirectory.listFiles(IOUtils.CSV_COMPRESSED_FILTER)) {
            if (!f.getName().contains("oryx-append")) {
                for (CharSequence line : new FileLineIterable(f)) {
                    String[] columns = DelimitedDataUtils.decode(line);
                    userIDsSet.add(columns[0]);
                    itemIDsSet.add(columns[1]);
                }
            }
        }

        this.client = client;
        this.uniqueUserIDs = userIDsSet.toArray(new String[userIDsSet.size()]);
        this.uniqueItemIDs = itemIDsSet.toArray(new String[itemIDsSet.size()]);
        this.steps = steps;
    }

    public int getSteps() {
        return steps;
    }

    @Override
    public Void call() throws Exception {
        runLoad();
        return null;
    }

    public void runLoad() throws InterruptedException {

        final StorelessUnivariateStatistic recommendedBecause = new Mean();
        final StorelessUnivariateStatistic setPreference = new Mean();
        final StorelessUnivariateStatistic removePreference = new Mean();
        final StorelessUnivariateStatistic ingest = new Mean();
        final StorelessUnivariateStatistic refresh = new Mean();
        final StorelessUnivariateStatistic estimatePreference = new Mean();
        final StorelessUnivariateStatistic mostSimilarItems = new Mean();
        final StorelessUnivariateStatistic similarityToItem = new Mean();
        final StorelessUnivariateStatistic mostPopularItems = new Mean();
        final StorelessUnivariateStatistic recommendToMany = new Mean();
        final StorelessUnivariateStatistic recommend = new Mean();
        final RandomGenerator random = RandomManager.getRandom();

        int numCores = Runtime.getRuntime().availableProcessors();
        final int stepsPerWorker = steps / numCores;
        Collection<Callable<Object>> workers = Lists.newArrayListWithCapacity(numCores);
        for (int i = 0; i < numCores; i++) {
            workers.add(new Callable<Object>() {
                @Override
                public Void call() throws Exception {
                    for (int i = 0; i < stepsPerWorker; i++) {
                        double r;
                        String userID;
                        String itemID;
                        String itemID2;
                        float value;
                        synchronized (random) {
                            r = random.nextDouble();
                            userID = uniqueUserIDs[random.nextInt(uniqueUserIDs.length)];
                            itemID = uniqueItemIDs[random.nextInt(uniqueItemIDs.length)];
                            itemID2 = uniqueItemIDs[random.nextInt(uniqueItemIDs.length)];
                            value = random.nextInt(10);
                        }
                        long stepStart = System.currentTimeMillis();
                        if (r < 0.05) {
                            client.recommendedBecause(userID, itemID, 10);
                            recommendedBecause.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.07) {
                            client.setPreference(userID, itemID);
                            setPreference.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.08) {
                            client.setPreference(userID, itemID, value);
                            setPreference.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.11) {
                            client.removePreference(userID, itemID);
                            removePreference.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.12) {
                            Reader reader = new StringReader(
                                    DelimitedDataUtils.encode(userID, itemID, Float.toString(value)) + '\n');
                            client.ingest(reader);
                            ingest.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.13) {
                            client.refresh();
                            refresh.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.14) {
                            client.similarityToItem(itemID, itemID2);
                            similarityToItem.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.15) {
                            client.mostPopularItems(10);
                            mostPopularItems.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.19) {
                            client.estimatePreference(userID, itemID);
                            estimatePreference.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.20) {
                            client.estimateForAnonymous(itemID, new String[] { itemID2 });
                            estimatePreference.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.25) {
                            client.mostSimilarItems(new String[] { itemID }, 10);
                            mostSimilarItems.increment(System.currentTimeMillis() - stepStart);
                        } else if (r < 0.30) {
                            client.recommendToMany(new String[] { userID, userID }, 10, true, null);
                            recommendToMany.increment(System.currentTimeMillis() - stepStart);
                        } else {
                            client.recommend(userID, 10);
                            recommend.increment(System.currentTimeMillis() - stepStart);
                        }
                    }
                    return null;
                }
            });
        }

        log.info("Starting load test...");

        long start = System.currentTimeMillis();
        ExecutorService executor = Executors.newFixedThreadPool(numCores);
        Iterable<Future<Object>> futures;
        try {
            futures = executor.invokeAll(workers);
        } finally {
            ExecutorUtils.shutdownNowAndAwait(executor);
        }
        long end = System.currentTimeMillis();

        ExecutorUtils.checkExceptions(futures);

        log.info("Finished {} steps in {}ms", steps, end - start);

        log.info("recommendedBecause: {}", recommendedBecause.getResult());
        log.info("setPreference: {}", setPreference.getResult());
        log.info("removePreference: {}", removePreference.getResult());
        log.info("ingest: {}", ingest.getResult());
        log.info("refresh: {}", refresh.getResult());
        log.info("estimatePreference: {}", estimatePreference.getResult());
        log.info("mostSimilarItems: {}", mostSimilarItems.getResult());
        log.info("similarityToItem: {}", similarityToItem.getResult());
        log.info("mostPopularItems: {}", mostPopularItems.getResult());
        log.info("recommendToMany: {}", recommendToMany.getResult());
        log.info("recommend: {}", recommend.getResult());
    }

}