com.streamsets.pipeline.lib.parser.syslog.SyslogParser.java Source code

Java tutorial

Introduction

Here is the source code for com.streamsets.pipeline.lib.parser.syslog.SyslogParser.java

Source

/**
 * Code was adapted from:
 * https://github.com/apache/flume/blob/trunk/flume-ng-core/src/main/java/org/apache/flume/source/SyslogParser.java
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.streamsets.pipeline.lib.parser.syslog;

import com.google.common.base.Throwables;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;

import java.net.InetSocketAddress;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.regex.Pattern;

import com.streamsets.pipeline.api.Field;
import com.streamsets.pipeline.api.Record;
import com.streamsets.pipeline.api.Stage;
import com.streamsets.pipeline.api.base.OnRecordErrorException;
import com.streamsets.pipeline.api.impl.Utils;
import com.streamsets.pipeline.lib.parser.AbstractParser;
import io.netty.buffer.ByteBuf;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

public class SyslogParser extends AbstractParser {

    static final String SYSLOG_FACILITY = "facility";
    static final String SYSLOG_SEVERITY = "severity";
    static final String SYSLOG_PRIORITY = "priority";
    static final String SYSLOG_VERSION = "version";
    static final String TIMESTAMP = "timestamp";
    static final String HOST = "host";
    static final String REMAINING = "remaining";
    static final String RAW = "raw";
    static final String RECEIVER_PORT = "receiverPort";
    static final String RECEIVER_ADDR = "receiverAddr";
    static final String SENDER_PORT = "senderPort";
    static final String SENDER_ADDR = "senderAddr";
    private static final Field EMPTY_STRING = Field.create("");
    private static final Field SYSLOG_VERSION1 = Field.create(1);
    private static final Pattern TWO_SPACES = Pattern.compile("  ");
    private static final DateTimeFormatter rfc3164Format = DateTimeFormat.forPattern("MMM d HH:mm:ss")
            .withZoneUTC();

    private static final String timePat = "yyyy-MM-dd'T'HH:mm:ss";
    private static final int RFC3164_LEN = 15;
    private static final int RFC5424_PREFIX_LEN = 19;
    private final DateTimeFormatter timeParser;
    private final Charset charset;
    private final LoadingCache<String, Long> timestampCache;
    private long recordId;

    public SyslogParser(Stage.Context context, Charset charset) {
        super(context);
        this.charset = charset;
        timeParser = DateTimeFormat.forPattern(timePat).withZoneUTC();
        timestampCache = CacheBuilder.newBuilder().maximumSize(1000).build(new CacheLoader<String, Long>() {
            @Override
            public Long load(String key) {
                return timeParser.parseMillis(key);
            }
        });
    }

    @Override
    public List<Record> parse(ByteBuf buf, InetSocketAddress recipient, InetSocketAddress sender)
            throws OnRecordErrorException {
        Map<String, Field> fields = new HashMap<>();
        final String msg = buf.toString(charset);
        int msgLen = msg.length();
        int curPos = 0;
        fields.put(RAW, Field.create(msg));
        if (msg.charAt(curPos) != '<') {
            throw new OnRecordErrorException(Errors.SYSLOG_01, "cannot find open bracket '<'", msg);
        }
        int endBracketPos = msg.indexOf('>');
        if (endBracketPos <= 0 || endBracketPos > 6) {
            throw new OnRecordErrorException(Errors.SYSLOG_01, "cannot find end bracket '>'", msg);
        }
        String priority = msg.substring(1, endBracketPos);
        int pri;
        try {
            pri = Integer.parseInt(priority);
        } catch (NumberFormatException nfe) {
            throw new OnRecordErrorException(Errors.SYSLOG_01, nfe, msg, nfe);
        }
        int facility = pri / 8;
        int severity = pri % 8;

        // Remember priority
        fields.put(SYSLOG_PRIORITY, Field.create(priority));

        // put fac / sev into header
        fields.put(SYSLOG_FACILITY, Field.create(facility));
        fields.put(SYSLOG_SEVERITY, Field.create(severity));

        if (msgLen <= endBracketPos + 1) {
            throw new OnRecordErrorException(Errors.SYSLOG_02, msg);
        }
        // update parsing position
        curPos = endBracketPos + 1;

        // remember version string
        if (msgLen > curPos + 2 && "1 ".equals(msg.substring(curPos, curPos + 2))) {
            // this is curious, I guess the code above matches 1 exactly because
            // there has not been another version.
            fields.put(SYSLOG_VERSION, SYSLOG_VERSION1);
            curPos += 2;
        }

        // now parse timestamp (handle different varieties)
        long ts;
        String tsString;
        char dateStartChar = msg.charAt(curPos);

        // no timestamp specified; use relay current time
        if (dateStartChar == '-') {
            tsString = Character.toString(dateStartChar);
            ts = System.currentTimeMillis();
            if (msgLen <= curPos + 2) {
                throw new OnRecordErrorException(Errors.SYSLOG_03, msg);
            }
            curPos += 2; // assume we skip past a space to get to the hostname
            // rfc3164 timestamp
        } else if (dateStartChar >= 'A' && dateStartChar <= 'Z') {
            if (msgLen <= curPos + RFC3164_LEN) {
                throw new OnRecordErrorException(Errors.SYSLOG_04, msg);
            }
            tsString = msg.substring(curPos, curPos + RFC3164_LEN);
            ts = parseRfc3164Time(tsString);
            curPos += RFC3164_LEN + 1;
            // rfc 5424 timestamp
        } else {
            int nextSpace = msg.indexOf(' ', curPos);
            if (nextSpace == -1) {
                throw new OnRecordErrorException(Errors.SYSLOG_04, msg);
            }
            tsString = msg.substring(curPos, nextSpace);
            ts = parseRfc5424Date(tsString);
            curPos = nextSpace + 1;
        }
        fields.put(TIMESTAMP, Field.create(ts));
        // parse out hostname
        int nextSpace = msg.indexOf(' ', curPos);
        if (nextSpace == -1) {
            throw new OnRecordErrorException(Errors.SYSLOG_03, msg);
        }
        fields.put(HOST, Field.create(msg.substring(curPos, nextSpace)));
        if (msgLen > nextSpace + 1) {
            curPos = nextSpace + 1;
            fields.put(REMAINING, Field.create(msg.substring(curPos)));
        } else {
            fields.put(REMAINING, EMPTY_STRING);
        }
        String receiverHost = recipient.getHostString();
        if (receiverHost == null) {
            receiverHost = recipient.toString();
        }
        Field receiverAddr = Field.create(receiverHost + ":" + recipient.getPort());
        fields.put(RECEIVER_ADDR, receiverAddr);
        fields.put(RECEIVER_PORT, Field.create(recipient.getPort()));
        String senderHost = sender.getHostString();
        if (senderHost == null) {
            senderHost = sender.toString();
        }
        Field senderAddr = Field.create(senderHost + ":" + sender.getPort());
        fields.put(SENDER_ADDR, senderAddr);
        fields.put(SENDER_PORT, Field.create(sender.getPort()));
        Record record = context.createRecord(senderAddr.getValueAsString() + "::" + recordId++);
        record.set(Field.create(fields));
        return Arrays.asList(record);
    }

    /**
     * Parse date in RFC 5424 format. Uses an LRU cache to speed up parsing for
     * multiple messages that occur in the same second.
     * @param msg
     * @return Typical (for Java) milliseconds since UNIX epoch
     */
    protected long parseRfc5424Date(String msg) throws OnRecordErrorException {

        long ts;
        int curPos = 0;
        int msgLen = msg.length();
        if (msgLen <= RFC5424_PREFIX_LEN) {
            throw new OnRecordErrorException(Errors.SYSLOG_09, msg);
        }
        String timestampPrefix = msg.substring(curPos, RFC5424_PREFIX_LEN);
        try {
            ts = timestampCache.get(timestampPrefix);
        } catch (ExecutionException ex) {
            Throwable cause = Throwables.getRootCause(ex);
            if (cause instanceof IllegalArgumentException) {
                throw new OnRecordErrorException(Errors.SYSLOG_05, cause, timestampPrefix, cause);
            } else {
                // I don't believe this will ever occur
                throw new IllegalStateException(Utils.format(Errors.SYSLOG_05.getMessage(), cause, timestampPrefix),
                        cause);
            }
        }
        curPos += RFC5424_PREFIX_LEN;
        // look for the optional fractional seconds
        if (msg.charAt(curPos) == '.') {
            // figure out how many numeric digits
            boolean foundEnd = false;
            int endMillisPos = curPos + 1;
            if (msgLen <= endMillisPos) {
                throw new OnRecordErrorException(Errors.SYSLOG_06, msg);
            }
            // FIXME: TODO: ensure we handle all bad formatting cases
            while (!foundEnd) {
                char curDigit = msg.charAt(endMillisPos);
                if (curDigit >= '0' && curDigit <= '9') {
                    endMillisPos++;
                } else {
                    foundEnd = true;
                }
            }
            // if they had a valid fractional second, append it rounded to millis
            final int fractionalPositions = endMillisPos - (curPos + 1);
            if (fractionalPositions > 0) {
                long milliseconds = Long.parseLong(msg.substring(curPos + 1, endMillisPos));
                if (fractionalPositions > 3) {
                    milliseconds /= Math.pow(10, (fractionalPositions - 3));
                } else if (fractionalPositions < 3) {
                    milliseconds *= Math.pow(10, (3 - fractionalPositions));
                }
                ts += milliseconds;
            } else {
                throw new OnRecordErrorException(Errors.SYSLOG_07, msg);
            }
            curPos = endMillisPos;
        }
        // look for timezone
        char tzFirst = msg.charAt(curPos);
        // UTC
        if (tzFirst == 'Z') {
            // no-op
        } else if (tzFirst == '+' || tzFirst == '-') {
            if (msgLen <= curPos + 5) {
                throw new OnRecordErrorException(Errors.SYSLOG_08, msg);
            }
            int polarity;
            if (tzFirst == '+') {
                polarity = +1;
            } else {
                polarity = -1;
            }

            char[] h = new char[5];
            for (int i = 0; i < 5; i++) {
                h[i] = msg.charAt(curPos + 1 + i);
            }

            if (h[0] >= '0' && h[0] <= '9' && h[1] >= '0' && h[1] <= '9' && h[2] == ':' && h[3] >= '0'
                    && h[3] <= '9' && h[4] >= '0' && h[4] <= '9') {
                try {
                    int hourOffset = Integer.parseInt(msg.substring(curPos + 1, curPos + 3));
                    int minOffset = Integer.parseInt(msg.substring(curPos + 4, curPos + 6));
                    ts -= polarity * ((hourOffset * 60) + minOffset) * 60000;
                } catch (NumberFormatException nfe) {
                    throw new OnRecordErrorException(Errors.SYSLOG_08, msg, nfe);
                }
            } else {
                throw new OnRecordErrorException(Errors.SYSLOG_08, msg);
            }
        }
        return ts;
    }

    /**
     * Parse the RFC3164 date format. This is trickier than it sounds because this
     * format does not specify a year so we get weird edge cases at year
     * boundaries. This implementation tries to "do what I mean".
     * @param ts RFC3164-compatible timestamp to be parsed
     * @return Typical (for Java) milliseconds since the UNIX epoch
     */
    protected long parseRfc3164Time(String ts) throws OnRecordErrorException {
        DateTime now = DateTime.now();
        int year = now.getYear();
        ts = TWO_SPACES.matcher(ts).replaceFirst(" ");
        DateTime date;
        try {
            date = rfc3164Format.parseDateTime(ts);
        } catch (IllegalArgumentException e) {
            throw new OnRecordErrorException(Errors.SYSLOG_10, ts, e);
        }
        // try to deal with boundary cases, i.e. new year's eve.
        // rfc3164 dates are really dumb.
        // NB: cannot handle replaying of old logs or going back to the future
        DateTime fixed = date.withYear(year);
        // flume clock is ahead or there is some latency, and the year rolled
        if (fixed.isAfter(now) && fixed.minusMonths(1).isAfter(now)) {
            fixed = date.withYear(year - 1);
            // flume clock is behind and the year rolled
        } else if (fixed.isBefore(now) && fixed.plusMonths(1).isBefore(now)) {
            fixed = date.withYear(year + 1);
        }
        date = fixed;
        return date.getMillis();
    }
}