com.mozilla.socorro.hadoop.HardwareAccel.java Source code

Introduction

Here is the source code for com.mozilla.socorro.hadoop.HardwareAccel.java
Source

/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Mozilla Socorro.
 *
 * The Initial Developer of the Original Code is the Mozilla Foundation.
 * Portions created by the Initial Developer are Copyright (C) 2010
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 *   Xavier Stevens <xstevens@mozilla.com>, Mozilla Corporation (original author)
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

package com.mozilla.socorro.hadoop;

import static com.mozilla.socorro.hadoop.CrashReportJob.*;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.TypeReference;

import com.mozilla.util.DateUtil;

public class HardwareAccel implements Tool {

    private static final org.slf4j.Logger LOG = org.slf4j.LoggerFactory.getLogger(HardwareAccel.class);

    private static final String NAME = "HardwareAccel";
    private Configuration conf;

    public static class HardwareAccelMapper extends TableMapper<Text, IntWritable> {

        public enum ReportStats {
            JSON_PARSE_EXCEPTION, JSON_MAPPING_EXCEPTION, JSON_BYTES_NULL, DATE_PARSE_EXCEPTION, PRODUCT_FILTERED, RELEASE_FILTERED, OS_FILTERED, OS_VERSION_FILTERED, TIME_FILTERED
        }

        private Text outputKey;
        private IntWritable one;

        private ObjectMapper jsonMapper;

        private String productFilter;
        private String releaseFilter;
        private String osFilter;
        private String osVersionFilter;

        private Pattern newlinePattern;
        private Pattern pipePattern;

        private SimpleDateFormat sdf;
        private long startTime;
        private long endTime;

        /* (non-Javadoc)
         * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context)
         */
        public void setup(Context context) {
            outputKey = new Text();
            one = new IntWritable(1);

            jsonMapper = new ObjectMapper();

            Configuration conf = context.getConfiguration();

            productFilter = conf.get(PRODUCT_FILTER);
            releaseFilter = conf.get(RELEASE_FILTER);
            osFilter = conf.get(OS_FILTER, "Windows NT");
            osVersionFilter = conf.get(OS_VERSION_FILTER, "6.1.7600");
            newlinePattern = Pattern.compile("\n");
            pipePattern = Pattern.compile("\\|");

            sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            startTime = DateUtil.getTimeAtResolution(conf.getLong(START_TIME, 0), Calendar.DATE);
            endTime = DateUtil.getEndTimeAtResolution(conf.getLong(END_TIME, System.currentTimeMillis()),
                    Calendar.DATE);
        }

        /* (non-Javadoc)
         * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context)
         */
        public void map(ImmutableBytesWritable key, Result result, Context context)
                throws InterruptedException, IOException {
            try {
                byte[] valueBytes = result.getValue(PROCESSED_DATA_BYTES, JSON_BYTES);
                if (valueBytes == null) {
                    context.getCounter(ReportStats.JSON_BYTES_NULL).increment(1L);
                    return;
                }
                String value = new String(valueBytes);
                // This is an untyped parse so the caller is expected to know the types
                Map<String, Object> crash = jsonMapper.readValue(value, new TypeReference<Map<String, Object>>() {
                });

                // Filter row if filter(s) are set and it doesn't match
                if (!StringUtils.isBlank(productFilter)) {
                    if (crash.containsKey(PROCESSED_JSON_PRODUCT)
                            && !crash.get(PROCESSED_JSON_PRODUCT).equals(productFilter)) {
                        context.getCounter(ReportStats.PRODUCT_FILTERED).increment(1L);
                        return;
                    }
                }
                if (!StringUtils.isBlank(releaseFilter)) {
                    if (crash.containsKey(PROCESSED_JSON_VERSION)
                            && !crash.get(PROCESSED_JSON_VERSION).equals(releaseFilter)) {
                        context.getCounter(ReportStats.RELEASE_FILTERED).increment(1L);
                        return;
                    }
                }

                String osName = (String) crash.get(PROCESSED_JSON_OS_NAME);
                if (!StringUtils.isBlank(osFilter)) {
                    if (osName == null || !osName.equals(osFilter)) {
                        context.getCounter(ReportStats.OS_FILTERED).increment(1L);
                        return;
                    }
                }

                String osVersion = (String) crash.get(PROCESSED_JSON_OS_VERSION);
                if (!StringUtils.isBlank(osVersionFilter)) {
                    if (osVersion == null || !osVersion.startsWith(osVersionFilter)) {
                        context.getCounter(ReportStats.OS_VERSION_FILTERED).increment(1L);
                        return;
                    }
                }

                // Set the value to the date
                String dateProcessed = (String) crash.get(PROCESSED_JSON_DATE_PROCESSED);
                long crashTime = sdf.parse(dateProcessed).getTime();
                // Filter if the processed date is not within our window
                if (crashTime < startTime || crashTime > endTime) {
                    context.getCounter(ReportStats.TIME_FILTERED).increment(1L);
                    return;
                }

                boolean hadModuleLoaded = false;
                for (String dumpline : newlinePattern.split((String) crash.get(PROCESSED_JSON_DUMP))) {
                    if (dumpline.startsWith(PROCESSED_JSON_MODULE_PATTERN)) {
                        // module_str, libname, version, pdb, checksum, addrstart, addrend, unknown
                        String[] dumplineSplits = pipePattern.split(dumpline);
                        if (!StringUtils.isBlank(dumplineSplits[1]) && dumplineSplits[1].startsWith("d2d")) {
                            hadModuleLoaded = true;
                            outputKey.set(dumplineSplits[1]);
                            break;
                        }
                    }
                }

                if (!hadModuleLoaded) {
                    outputKey.set("no_module");
                }
                context.write(outputKey, one);
            } catch (JsonParseException e) {
                context.getCounter(ReportStats.JSON_PARSE_EXCEPTION).increment(1L);
            } catch (JsonMappingException e) {
                context.getCounter(ReportStats.JSON_MAPPING_EXCEPTION).increment(1L);
            } catch (ParseException e) {
                context.getCounter(ReportStats.DATE_PARSE_EXCEPTION).increment(1L);
            }
        }

    }

    /**
     * @param args
     * @return
     * @throws IOException
     * @throws ParseException
     */
    public Job initJob(String[] args) throws IOException, ParseException {
        Map<byte[], byte[]> columns = new HashMap<byte[], byte[]>();
        columns.put(PROCESSED_DATA_BYTES, JSON_BYTES);
        Job job = CrashReportJob.initJob(NAME, getConf(), HardwareAccel.class, HardwareAccelMapper.class,
                IntSumReducer.class, IntSumReducer.class, columns, Text.class, IntWritable.class,
                new Path(args[0]));
        job.setNumReduceTasks(1);

        return job;
    }

    /**
     * @return
     */
    private static int printUsage() {
        System.out.println("Usage: " + NAME + " [generic-options] <output-path>");
        System.out.println();
        System.out.println("Configurable Properties:");
        System.out.println(PRODUCT_FILTER + "=<product-name>");
        System.out.println(RELEASE_FILTER + "=<release-version>");
        System.out.println(OS_FILTER + "=<os-name>");
        System.out.println(OS_VERSION_FILTER + "=<os-version>");
        System.out.println(START_DATE + "=<yyyyMMdd>");
        System.out.println(END_DATE + "=<yyyyMMdd>");
        System.out.println();
        GenericOptionsParser.printGenericCommandUsage(System.out);
        return -1;
    }

    /* (non-Javadoc)
     * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
     */
    public int run(String[] args) throws Exception {
        if (args.length != 1) {
            return printUsage();
        }

        int rc = -1;
        Job job = initJob(args);
        job.waitForCompletion(true);
        if (job.isSuccessful()) {
            rc = 0;
        }

        return rc;
    }

    /* (non-Javadoc)
     * @see org.apache.hadoop.conf.Configurable#getConf()
     */
    public Configuration getConf() {
        return this.conf;
    }

    /* (non-Javadoc)
     * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf.Configuration)
     */
    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    /**
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new HardwareAccel(), args);
        System.exit(res);
    }

}