org.apache.beam.examples.oozie.OozieAuditLogParser.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.beam.examples.oozie.OozieAuditLogParser.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package org.apache.beam.examples.oozie;

import org.apache.beam.sdk.transforms.*;
import org.apache.beam.sdk.values.PCollection;
import org.apache.commons.lang.StringUtils;

import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class OozieAuditLogParser implements Serializable {

    public static final String MESSAGE_SPLIT_FLAG = "( - )";
    public static final String ALLOW_ALL_REGEX = "(.*)";
    private static final String COMMON_REGEX = "\\s([^\\]]*\\])";
    private static final String TIMESTAMP_REGEX = "(\\d\\d\\d\\d-\\d\\d-\\d\\d \\d\\d:\\d\\d:\\d\\d,\\d\\d\\d)";
    private static final String WHITE_SPACE_REGEX = "\\s+";
    private static final String LOG_LEVEL_REGEX = "(\\w+)";
    private static final String OOZIEAUDIT_FLAG = "(\\w+:\\d+)";
    private static final String PREFIX_REGEX = TIMESTAMP_REGEX + WHITE_SPACE_REGEX + LOG_LEVEL_REGEX
            + WHITE_SPACE_REGEX;
    private static final String IP = "IP";
    private static final String USER = "USER";
    private static final String GROUP = "GROUP";
    private static final String APP = "APP";
    private static final String JOBID = "JOBID";
    private static final String OPERATION = "OPERATION";
    private static final String PARAMETER = "PARAMETER";
    private static final String STATUS = "STATUS";
    private static final String HTTPCODE = "HTTPCODE";
    private static final String ERRORCODE = "ERRORCODE";
    private static final String ERRORMESSAGE = "ERRORMESSAGE";
    private static final Pattern LOG_PATTERN = constructPattern();

    public static long humanDateToMilliseconds(String date) throws ParseException {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
        sdf.setTimeZone(TimeZone.getDefault());
        Date d = sdf.parse(date);
        return d.getTime();
    }

    private static Pattern constructPattern() {
        List<String> patterns = new ArrayList<String>(11);
        patterns.add(IP);
        patterns.add(USER);
        patterns.add(GROUP);
        patterns.add(APP);
        patterns.add(JOBID);
        patterns.add(OPERATION);
        patterns.add(PARAMETER);
        patterns.add(STATUS);
        patterns.add(HTTPCODE);
        patterns.add(ERRORCODE);
        patterns.add(ERRORMESSAGE);

        StringBuilder sb = new StringBuilder();
        sb.append(PREFIX_REGEX + OOZIEAUDIT_FLAG);
        sb.append(MESSAGE_SPLIT_FLAG);
        for (int i = 0; i < patterns.size(); i++) {
            sb.append("(");
            sb.append(patterns.get(i) + COMMON_REGEX);
            sb.append(")");
            sb.append(ALLOW_ALL_REGEX);
        }
        String rs = StringUtils.removeEnd(sb.toString(), ALLOW_ALL_REGEX);
        return Pattern.compile(rs);
    }

    public OozieAuditLogObject parse(String logLine) throws Exception {

        OozieAuditLogObject oozieAuditLogObject = new OozieAuditLogObject();
        Matcher matcher = LOG_PATTERN.matcher(logLine);
        if (!matcher.matches()) {
            return null;
        }
        applyValueTo(oozieAuditLogObject, matcher);

        return oozieAuditLogObject;
    }

    private void applyValueTo(OozieAuditLogObject oozieAuditLogObject, Matcher matcher) throws ParseException {
        oozieAuditLogObject.timestamp = humanDateToMilliseconds(matcher.group(1));
        oozieAuditLogObject.level = matcher.group(2);
        oozieAuditLogObject.ip = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(6), "["), "]");
        oozieAuditLogObject.user = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(9), "["), "]");
        oozieAuditLogObject.group = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(12), "["), "]");
        oozieAuditLogObject.app = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(15), "["), "]");
        oozieAuditLogObject.jobId = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(18), "["), "]");
        oozieAuditLogObject.operation = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(21), "["), "]");
        oozieAuditLogObject.parameter = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(24), "["), "]");
        oozieAuditLogObject.status = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(27), "["), "]");
        oozieAuditLogObject.httpcode = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(30), "["), "]");
        oozieAuditLogObject.errorcode = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(33), "["), "]");
        oozieAuditLogObject.errormessage = StringUtils.removeEnd(StringUtils.removeStart(matcher.group(36), "["),
                "]");
    }

    static class ParserFn extends DoFn<String, OozieAuditLogObject> {
        private final Aggregator<Long, Long> emptyLines = createAggregator("emptyLines", new Sum.SumLongFn());
        private OozieAuditLogParser parser = new OozieAuditLogParser();

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            if (c.element().trim().isEmpty()) {
                emptyLines.addValue(1L);
            }
            OozieAuditLogObject object = parser.parse(c.element());
            if (object != null)
                c.output(object);
        }
    }

    public static class LogParser extends PTransform<PCollection<String>, PCollection<OozieAuditLogObject>> {
        @Override
        public PCollection<OozieAuditLogObject> apply(PCollection<String> lines) {
            return lines.apply(ParDo.of(new ParserFn()));
        }
    }

    /** A SimpleFunction that converts a Word and Count into a printable string. */
    public static class FormatAsTextFn extends SimpleFunction<OozieAuditLogObject, String> {
        @Override
        public String apply(OozieAuditLogObject input) {
            return input.toString();
        }
    }
}