tv.icntv.grade.film.recommend.CorrelateJob.java Source code

Java tutorial

Introduction

Here is the source code for tv.icntv.grade.film.recommend.CorrelateJob.java

Source

/* Copyright 2013 Future TV, Inc.
 *
 *      Licensed under the Apache License, Version 2.0 (the "License");
 *      you may not use this file except in compliance with the License.
 *      You may obtain a copy of the License at
 *
 *          http://www.apache.org/licenses/LICENSE-2.0
 *
 *      Unless required by applicable law or agreed to in writing, software
 *      distributed under the License is distributed on an "AS IS" BASIS,
 *      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *      See the License for the specific language governing permissions and
 *      limitations under the License.
 */

package tv.icntv.grade.film.recommend;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.sun.istack.Nullable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.Parameters;
import org.apache.mahout.fpm.pfpgrowth.PFPGrowth;
import tv.icntv.grade.film.core.AbstractJob;
import tv.icntv.grade.film.correlate.UserHistoryMapper;
import tv.icntv.grade.film.correlate.UserHistoryReducer;
import tv.icntv.grade.film.utils.HadoopUtils;
import tv.icntv.grade.film.utils.MapReduceUtils;

import java.util.Date;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: xiaowu lei
 * Date: 13-12-2
 * Time: ?4:47
 */
public class CorrelateJob extends AbstractJob {

    @Override
    public int run(String[] strings) throws Exception {
        Configuration configuration = getConf();
        HadoopUtils.deleteIfExist(strings[1]);
        Job correlate = new Job(configuration, "icntv correlate job");
        MapReduceUtils.initMapperJob(UserHistoryMapper.class, Text.class, Text.class, this.getClass(), correlate,
                getPaths(strings[0].split(",")));
        MapReduceUtils.initReducerJob(new Path(strings[1]), UserHistoryReducer.class, correlate);
        if (!correlate.waitForCompletion(true)) {
            return 1;
        }
        ;
        Parameters parameter = getParameter(strings[2]);
        HadoopUtils.deleteIfExist(parameter.get("output"));
        PFPGrowth.runPFPGrowth(parameter, configuration);
        String output = parameter.get("output") + "/frequentpatterns";
        long count = HadoopUtils.count(new Path(output), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().matches("part-r-\\d*"); //To change body of implemented methods use File | Settings | File Templates.
            }
        });
        if (count == 0) {
            return 1;
        }
        configuration.setLong("icntv.correlate.total.size", count);
        HadoopUtils.deleteIfExist(strings[3]);
        Job result = new Job(configuration, "correlate result calculate");
        MapReduceUtils.initMapperJob(CorrelateInputMapper.class, Text.class, Text.class, this.getClass(), result,
                new Path(output));
        result.setInputFormatClass(SequenceFileInputFormat.class);
        //        TableMapReduceUtil.initTableReducerJob("");
        MapReduceUtils.initReducerJob(new Path(strings[3]), CorrelateOutPutReducer.class, result);
        result.waitForCompletion(true);
        return 0; //To change body of implemented methods use File | Settings | File Templates.
    }

    private Parameters getParameter(String strings) {
        Parameters parameters = new Parameters();
        String[] values = strings.split("--");
        for (String v : values) {
            String[] kvs = v.split("=");
            if (null == kvs || kvs.length != 2) {
                continue;
            }
            parameters.set(kvs[0], kvs[1]);
        }
        return parameters; //To change body of created methods use File | Settings | File Templates.
    }

    public static void main(String[] args) throws Exception {
        final Configuration configuration = HBaseConfiguration.create();
        configuration.addResource("grade.xml");
        String tables = configuration.get("hbase.cdn.tables");
        if (Strings.isNullOrEmpty(tables)) {
            return;
        }
        List<String> list = Lists.newArrayList(Splitter.on(",").split(tables));
        List<String> results = Lists.transform(list, new Function<String, String>() {
            @Override
            public String apply(@Nullable java.lang.String input) {
                return String.format(configuration.get("hdfs.directory.base.db"), new Date(), input);
            }
        });
        String middleDirectory = String.format(configuration.get("icntv.correlate.input"), new Date());
        StringBuilder sb = new StringBuilder();
        sb.append("minSupport=").append(configuration.get("correlate.minSupport", "3")).append("--")
                .append("maxHeapSize=100").append("--").append("splitterPattern='[\t ]'").append("--")
                .append("input=").append(middleDirectory).append("--").append("output=")
                .append(String.format(configuration.get("icntv.correlate.fp.growth.output"), new Date()));
        ToolRunner.run(configuration, new CorrelateJob(),
                new String[] { Joiner.on(",").join(results), middleDirectory, sb.toString(),
                        String.format(configuration.get("icntv.correlate.output"), new Date()) });
    }
}