Java tutorial
/* Copyright IBM Corp. 2015, 2016 This file is part of Anomaly Detection Engine for Linux Logs (ADE). ADE is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. ADE is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with ADE. If not, see <http://www.gnu.org/licenses/>. */ package org.openmainframe.ade.ext.os.parser; import java.util.Date; import java.util.TimeZone; import org.openmainframe.ade.Ade; import org.openmainframe.ade.IAdeConfigProperties; import org.openmainframe.ade.exceptions.AdeException; import org.openmainframe.ade.ext.os.LinuxAdeExtProperties; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; /** * An abstract base class for RFC3164 syslog parsers. This class defines regular expressions * for the RFC3164 header fields while leaving additional parsing of the message body to * concrete subclasses. */ public abstract class LinuxSyslog3164ParserBase extends LinuxSyslogLineParser { /** * Main logger for this class. */ static final Logger s_logger = LoggerFactory.getLogger(LinuxSyslog3164ParserBase.class); /** * The end of today, when the parser was first loaded. */ private static DateTime END_OF_TODAY = null; /** * The input time-zone specified in setup.props. */ private static DateTimeZone INPUT_TIME_ZONE; /** * The output time-zone specified in setup.props. */ private static DateTimeZone OUTPUT_TIME_ZONE; /** * LinuxAdeExtProperties object that contains properties and configurations information from the start * of AdeExt main class. */ private static LinuxAdeExtProperties s_linuxAdeExtProperties = null; /** * A year setting object. */ private LinuxSyslogYearSetter yearSetter; /** * Regular expression to extract the priority information from the header. Note: The priority is optional. * This allows logs that have been written to disk without the priority to be post-processed. */ protected static final String RFC3164_PRI = "(?:<(\\d{1,3})>){0,1}"; /** * Regular expression to extract the host name from the header. */ protected static final String RFC3164_HOSTNAME = "([\\-\\.:%_a-zA-Z0-9]{1,255})"; /** * Regular expression to extract the time-stamp from the header. */ public static final String RFC3164_TIMESTAMP = "((?:Apr|Aug|Dec|Feb|Jan|Jul|Jun|Mar|May|Nov|Oct|Sep) [ 0123][0-9] [012]\\d:[0-6]\\d:[0-6]\\d)"; /** * Regular expression to extract header information. (The priority, time-stamp, and host name) */ protected static final String RFC3164_HEADER = "^" + RFC3164_PRI + RFC3164_TIMESTAMP + " " + RFC3164_HOSTNAME + " "; /* * Within the RFC3164_HEADER regex string above, identify the regex * capturing groups for the parts that we want to extract. */ protected static final int RFC3164_HEADER_PRI_GROUP = 1; protected static final int RFC3164_HEADER_TIMESTAMP_GROUP = 2; protected static final int RFC3164_HEADER_HOSTNAME_GROUP = 3; /* * Define the number of capturing groups defined by the header. This is * useful to a subclass that wants to use the RFC3164_HEADER constant and * concatenate strings with additional capturing groups. The subclass can use * this constant to identify the start of its own capturing groups. */ protected static final int RFC3164_HEADER_GROUPS = 3; /** * The current year. */ private final int curYear; /* * Setup an array of DateTimeFormatter objects that can parse the dates in a * 3164 style message. Both are necessary because the DateTimeFormatter * parseDateTime() method doesn't handle a variable number of spaces * between the month and day. */ protected static final DateTimeFormatter[] dt_formatters = { DateTimeFormat.forPattern("MMM d HH:mm:ss").withZoneUTC(), DateTimeFormat.forPattern("MMM d HH:mm:ss").withZoneUTC() }; /** * Constructor for initializing the properties file and various time properties. * @param linuxAdeExtProperties Contains property and configuration information. * @throws AdeException */ public LinuxSyslog3164ParserBase(LinuxAdeExtProperties linuxAdeExtProperties) throws AdeException { this.curYear = new DateTime().getYear(); if (linuxAdeExtProperties == null) { m_LinuxAdeExtProperties = s_linuxAdeExtProperties; } else { m_LinuxAdeExtProperties = linuxAdeExtProperties; } /* Set the start of today and timezone*/ initializeTimeZoneAndStartOfToday(); } /** * Default constructor that sets the properties file to null. * @throws AdeException */ public LinuxSyslog3164ParserBase() throws AdeException { this(null); } /** * Set the AdeExt properties file. * @param linuxAdeExtProperties The properties file that contains the configuration and properties information. */ public static void setAdeExtProperties(LinuxAdeExtProperties linuxAdeExtProperties) { s_linuxAdeExtProperties = linuxAdeExtProperties; } /** * Returns the year stored in AdeExt properties file. * @return the year as an int value. */ public static int getAdeExtPropertiesYear() { return s_linuxAdeExtProperties.getYear(); } /** * Returns the input time zone specified in setup.props * @return The input time zone. */ public static DateTimeZone getInputTimeZone() { return INPUT_TIME_ZONE; } /** * Returns the output time zone specified in setup.props * @return The output time zone. */ public static DateTimeZone getOutputTimeZone() { return OUTPUT_TIME_ZONE; } /** * Retrieves the date parsed from the header of a log. Unless otherwise defined in the properties file, * we have to use some logic to figure out the year. After parsing the date, we need to correct the time-zone. * Then we set the dateTime to the current year. Now we need to check the dateTime and see if it's after today. * The logic is as follows: * - If Log time-stamp < End of day of today * (comparing Month, Day, Hour, Minutes, Seconds, with year missing), * assume it's this year. * - If Log time-stamp > End of day of today * (comparing Month, Day, Hour, Minutes, Seconds, with year missing), * assume it's previous year. * * The following restrictions will be made to customer for BulkLoad: * - Cannot upload logs older than 11 months. * - Cannot upload logs that are continuous for more than 11 months. * * Note: END OF TODAY is purposely chosen instead of START OF TODAY in case a user bulk loads logs that * belongs to today. It's not possible/likely that a user would bulk load logs from last year of the * same day with the restriction we specified above. * @param source the source name string value. * @param s the date and time string value. * @return Date object with date/time-stamp of the Linux log. */ @Override public final Date toDate(String source, String s) { DateTime dt = null; for (DateTimeFormatter fmt : dt_formatters) { try { dt = fmt.parseDateTime(s); dt = dt.withZoneRetainFields(INPUT_TIME_ZONE); dt = dt.withZone(OUTPUT_TIME_ZONE); /* Year must be set after all the time is normalized to the timezone */ dt = dt.withYear(curYear); if (s_linuxAdeExtProperties.isYearDefined()) { yearSetter = LinuxSyslogYearSetter.getYearSetter(source); /* If years is defined, then, use the defined year as a starting year */ final int yearToUse = yearSetter.getDesiredYear(dt); dt = dt.withYear(yearToUse); } else if (dt.isAfter(END_OF_TODAY)) { /* Set DateTime to previous year */ dt = dt.withYear(curYear - 1); } else { dt = dt.withYear(curYear); } /* AdeCore will take the Java Date object, and convert * it to the output time-zone, then extract the hour. */ return dt.toDate(); } catch (IllegalArgumentException e) { /* This exception can occur normally when iterating * through the DateTimeFormatter objects. It is only * an error worth noting when the dt object is not null. */ if (dt != null) { s_logger.error("Invalid argument encountered.", e); } } } throw new IllegalArgumentException("Failed to parse date " + s); } /** * Set the END_OF_TODAY value and time-zone values. The time-zone values are taken from the Ade * configuration properties. End_OF_TODAY value is retrieved by getting the current date-time, * adjust time-zone, add an additional day and set the time to the start of the day. * Note: These only need to be set once. * @throws AdeException */ private static void initializeTimeZoneAndStartOfToday() throws AdeException { synchronized (LinuxSyslog3164ParserBase.class) { if (END_OF_TODAY == null) { final IAdeConfigProperties adeConfig = Ade.getAde().getConfigProperties(); final TimeZone timeZone = adeConfig.getInputTimeZone(); final TimeZone outputTimezone = adeConfig.getOutputTimeZone(); INPUT_TIME_ZONE = DateTimeZone.forOffsetMillis(timeZone.getRawOffset()); OUTPUT_TIME_ZONE = DateTimeZone.forOffsetMillis(outputTimezone.getRawOffset()); END_OF_TODAY = DateTime.now(); END_OF_TODAY = END_OF_TODAY.withZone(OUTPUT_TIME_ZONE); END_OF_TODAY = END_OF_TODAY.plusDays(1); END_OF_TODAY = END_OF_TODAY.withTimeAtStartOfDay(); } } } /** * Return the DateTimeZone determined from toDate(String source, String s) method. * For 3164 messages, the DateTimeZone is not included in the log. * @return null since DateTimeZone is not included in the log. */ public final DateTime getLastDeterminedDateTime() { return null; } }