Java tutorial
/* * Copyright 2014-2017 JKOOL, LLC. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.jkoolcloud.tnt4j.streams.custom.parsers; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import com.jkoolcloud.tnt4j.core.OpLevel; import com.jkoolcloud.tnt4j.sink.DefaultEventSinkFactory; import com.jkoolcloud.tnt4j.sink.EventSink; import com.jkoolcloud.tnt4j.streams.parsers.ActivityRegExParser; import com.jkoolcloud.tnt4j.streams.utils.StreamsResources; /** * Implements a custom Apache access log parser based on RegEx parsing. User can define RegEx string to parse log as for * ordinary activity RegEx parser. * <p> * But it is also possible to use Apache access log configuration pattern over LogPattern parameter. Then RegEx is * generated from it. Additional config pattern tokens may be mapped to RegEx'es using ConfRegexMapping parameters. * <p> * This parser supports the following properties (in addition to those supported by {@link ActivityRegExParser}): * <ul> * <li>LogPattern - access log pattern. (Optional, if RegEx {@link Pattern} property is defined)</li> * <li>ConfRegexMapping - custom log pattern token and RegEx mapping. (Optional, actual only if {@code LogPattern} * property is used)</li> * </ul> * <p> * RegEx group names and log pattern tokens mapping: * <table summary="RegEx group names and log pattern tokens mapping"> * <tr><th>RegEx group name</th><th>FormatString</th><th>Description</th></tr> * <tr><td>address</td><td>%a</td><td>Client IP address of the request (see themod_remoteipmodule).</td></tr> * <tr><td>localAddress</td><td>%A</td><td>Local IP-address.</td></tr> * <tr><td>size</td><td>%B</td><td>Size of response in bytes, excluding HTTP headers.</td></tr> * <tr><td>sizeClf</td><td>%b</td><td>Size of response in bytes, excluding HTTP headers. In CLF format,e.g.,a '-' rather than a 0 when no bytes are * sent.</td></tr> * <tr><td>cookie</td><td>%{VARNAME}C</td><td>The contents of cookieVARNAMEin the request sent to the server. Only version 0 cookies are fully * supported.</td></tr> * <tr><td>reqTime</td><td>%D</td><td>The time taken to serve the request, in microseconds.</td></tr> * <tr><td>envVariable</td><td>%{VARNAME}e</td><td>The contents of the environment variableVARNAME.</td></tr> * <tr><td>filename</td><td>%f</td><td>Filename.</td></tr> * <tr><td>hostname</td><td>%h</td><td>Remote hostname. Will log the IP address ifHostnameLookupsis set toOff, which is the default. If it logs the * hostname for only a few hosts, you probably have access control directives mentioning them by name. Seethe Require * host documentation.</td></tr> * <tr><td>protocol</td><td>%H</td><td>The request protocol.</td></tr> * <tr><td>variable</td><td>%{VARNAME}i</td><td>The contents ofVARNAME:header line(s) in the request sent to the server. Changes made by other modules * (e.g.mod_headers) affect this. If you're interested in what the request header was prior to when most modules would * have modified it, usemod_setenv ifto copy the header into an internal environment variable and log that value with * the%{VARNAME}edescribed above.</td></tr> * <tr><td>keepAlive</td><td>%k</td><td>Number of keepalive requests handled on this connection. Interesting ifKeepAliveis being used, so that, for * example, a '1' means the first keepalive request after the initial one, '2' the second, etc...; otherwise this is * always 0 (indicating the initial request).</td></tr> * <tr><td>logname</td><td>%l</td><td>Remote logname (from identd, if supplied). This will return a dash unlessmod_identis present * andIdentityCheckis setOn.</td></tr> * <tr><td>method</td><td>%m</td><td>The request method.</td></tr> * <tr><td>note</td><td>%{VARNAME}n</td><td>The contents of noteVARNAMEfrom another module.</td></tr> * <tr><td>headerVar</td><td>%{VARNAME}o</td><td>The contents ofVARNAME:header line(s) in the reply.</td></tr> * <tr><td>port</td><td>%{format}p</td><td>The canonical port of the server serving the request, or the server's actual port, or the client's actual port. * Valid formats arecanonical,local, orremote.</td></tr> * <tr><td>process</td><td>%{format}P</td><td>The process ID or thread ID of the child that serviced the request. Valid formats arepid,tid, * andhextid.hextidrequires APR 1.2.0 or higher.</td></tr> * <tr><td>query</td><td>%q</td><td>The query string (prepended with a?if a query string exists, otherwise an empty string).</td></tr> * <tr><td>line</td><td>%r</td><td>First line of request.</td></tr> * <tr><td>respHandler</td><td>%R</td><td>The handler generating the response (if any).</td></tr> * <tr><td>status</td><td>%s</td><td>Status. For requests that have been internally redirected, this is the status of theoriginalrequest. * Use%>sfor the final status.</td></tr> * <tr><td>time</td><td>%{format}t</td><td>The time, in the form given by format, which should be in an extendedstrftime(3)format (potentially localized). * If the format starts withbegin:(default) the time is taken at the beginning of the request processing. If it starts * withend:it is the time when the log entry gets written, close to the end of the request processing. In addition to * the formats supported bystrftime(3), the following format tokens are supported:</td></tr> * <tr><td>reqTimeExt</td><td>%{UNIT}T</td><td>The time taken to serve the request, in a time unit given byUNIT. Valid units aremsfor milliseconds,usfor * microseconds, andsfor seconds. Usingsgives the same result as%Twithout any format; usingusgives the same * result as%D. Combining%Twith a unit is available in 2.4.13 and later.</td></tr> * <tr><td>user</td><td>%u</td><td>Remote user if the request was authenticated. May be bogus if return status (%s) is 401 (unauthorized).</td></tr> * <tr><td>uri</td><td>%U</td><td>The URL path requested, not including any query string.</td></tr> * <tr><td>serverName</td><td>%v</td><td>The canonicalServerNameof the server serving the request.</td></tr> * <tr><td>serverNameExt</td><td>%V</td><td>The server name according to theUseCanonicalNamesetting.</td></tr> * <tr><td>conStatus</td><td>%X</td><td>Connection status when response is completed:</td></tr> * <tr><td>received</td><td>%I</td><td>Bytes received, including request and headers. Cannot be zero. You need to enablemod_logioto use this.</td></tr> * <tr><td>sent</td><td>%O</td><td>Bytes sent, including headers. May be zero in rare cases such as when a request is aborted before a response is * sent. You need to enablemod_logioto use this.</td></tr> * </table> * <p> * NOTE: See {@link Pattern} documentation section "Group name" regarding groups naming conventions! * * @version $Revision: 2 $ */ public class ApacheAccessLogParser extends ActivityRegExParser { private static final EventSink LOGGER = DefaultEventSinkFactory.defaultEventSink(ApacheAccessLogParser.class); /** * Constant for name of built-in {@value} property. */ protected static final String PROP_APACHE_LOG_PATTERN = "LogPattern"; // NON-NLS /** * Constant for name of built-in {@value} property. */ protected static final String PROP_CONF_REGEX_MAPPING = "ConfRegexMapping"; // NON-NLS private static final String APACHE_LOG_CONFIG_TOKEN_REPLACEMENT_REGEX = "%\\S*(%|\\w)"; // NON-NLS private static final String DEFAULT_LOG_TOKEN_REGEX = "(\\S+)"; // NON-NLS private static final String STATUS_LOG_TOKEN_REGEX = "(?<status>\\d{3})"; // NON-NLS private static final String REQUEST_LOG_TOKEN_REGEX = "(?<request>((?<method>\\S+) (?<uri>.*?)( (?<version>\\S+))?)|(-))"; // NON-NLS // static final String REGEX_TOKENS = "(?<address>\\S+) // (?<user>.*?)\\[(?<when>.*?)\\] \"(?<request>.*?)\" // (?<status>[\\d\\-]+)(?<length>[\\d\\-]+) \"(?<referer>.*?)\" // \"(?<agent>.*?)\".*"; // // static final String REGEX_CLIENT_REQUEST = // "(?<method>\\S+)\\s+?(?<uri>.*?)\\s+?(?<version>HTTP.*)"; /** * Apache access log configuration pattern string. */ protected String apacheLogPattern = null; /** * Pre configured mappings between Apache access log configuration pattern token strings and RegEx strings used to * parse log entries. */ protected final Map<String, String> configRegexMappings = new HashMap<>(); /** * User defined mappings between Apache access log configuration pattern token strings and RegEx strings used to * parse log entries. */ protected final Map<String, String> userRegexMappings = new HashMap<>(); /** * Constructs a new ApacheAccessLogParser. */ public ApacheAccessLogParser() { fillDefaultConfigRegexMappings(); } @Override protected EventSink logger() { return LOGGER; } /** * Fills default Apache access log configuration to RegEx mappings. */ private void fillDefaultConfigRegexMappings() { configRegexMappings.put("%%", "%"); // NON-NLS %% The percent sign. configRegexMappings.put("%a", makeNamedGroup("address")); // NON-NLS configRegexMappings.put("%A", makeNamedGroup("localAddress")); // NON-NLS configRegexMappings.put("%B", makeNamedGroup("size")); // NON-NLS configRegexMappings.put("%b", "(?<sizeClf>\\d+|-)"); // NON-NLS configRegexMappings.put("%*C", makeNamedGroup("cookie")); // NON-NLS configRegexMappings.put("%*D", makeNamedGroup("reqTime")); // NON-NLS configRegexMappings.put("%*e", makeNamedGroup("envVariable")); // NON-NLS configRegexMappings.put("%f", makeNamedGroup("filename")); // NON-NLS configRegexMappings.put("%h", makeNamedGroup("hostname")); // NON-NLS configRegexMappings.put("%H", makeNamedGroup("protocol")); // NON-NLS configRegexMappings.put("%*i", "(?<variable>.*?)"); // NON-NLS configRegexMappings.put("%k", makeNamedGroup("keepAlive")); // NON-NLS configRegexMappings.put("%l", makeNamedGroup("logname")); // NON-NLS configRegexMappings.put("%m", makeNamedGroup("method")); // NON-NLS configRegexMappings.put("%*n", makeNamedGroup("note")); // NON-NLS configRegexMappings.put("%*o", makeNamedGroup("headerVar")); // NON-NLS configRegexMappings.put("%*p", makeNamedGroup("port")); // NON-NLS configRegexMappings.put("%*P", makeNamedGroup("process")); // NON-NLS configRegexMappings.put("%q", makeNamedGroup("query")); // NON-NLS configRegexMappings.put("%*r", REQUEST_LOG_TOKEN_REGEX); // NON-NLS configRegexMappings.put("%R", makeNamedGroup("respHandler")); // NON-NLS configRegexMappings.put("%*s", STATUS_LOG_TOKEN_REGEX); // NON-NLS configRegexMappings.put("%*t", "\\[(?<time>[\\w:/]+\\s[+\\-]\\d{4})\\]"); // NON-NLS configRegexMappings.put("%*T", makeNamedGroup("reqTimeExt")); // NON-NLS configRegexMappings.put("%*u", makeNamedGroup("user")); // NON-NLS configRegexMappings.put("%*U", makeNamedGroup("uri")); // NON-NLS configRegexMappings.put("%v", makeNamedGroup("serverName")); // NON-NLS configRegexMappings.put("%V", makeNamedGroup("serverNameExt")); // NON-NLS configRegexMappings.put("%X", makeNamedGroup("conStatus")); // NON-NLS configRegexMappings.put("%I", makeNamedGroup("received")); // NON-NLS configRegexMappings.put("%O", makeNamedGroup("sent")); // NON-NLS } @Override public void setProperties(Collection<Map.Entry<String, String>> props) throws Exception { if (props == null) { return; } super.setProperties(props); for (Map.Entry<String, String> prop : props) { String name = prop.getKey(); String value = prop.getValue(); if (PROP_APACHE_LOG_PATTERN.equalsIgnoreCase(name)) { if (StringUtils.isNotEmpty(value)) { apacheLogPattern = value; logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.setting"), name, value); } } else if (PROP_CONF_REGEX_MAPPING.equalsIgnoreCase(name)) { if (StringUtils.isNotEmpty(value)) { int idx = value.indexOf('='); if (idx > 0) { String confKey = value.substring(0, idx); String regex = value.substring(idx + 1); String oldRegex = userRegexMappings.put(confKey, regex); logger().log(OpLevel.DEBUG, StreamsResources.getString( StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.setting"), name, value); logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ApacheAccessLogParser.setting.regex.mapping"), confKey, oldRegex, regex); } } } } if (pattern == null && StringUtils.isNotEmpty(apacheLogPattern)) { String regex = makeRegexPattern(apacheLogPattern); if (regex != null) { pattern = Pattern.compile(regex); logger().log(OpLevel.INFO, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ApacheAccessLogParser.regex.made"), getName(), regex); } else { logger().log(OpLevel.WARNING, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ApacheAccessLogParser.could.not.make.regex"), getName(), apacheLogPattern); } } } /** * Makes log entry parsing RegEx from defined Apache access log configuration pattern string. * * @param apacheLogPattern * Apache access log configuration pattern string * * @return regular expression string, or {@code null} if can't make RegEx string from defined Apache access log * configuration pattern string */ private String makeRegexPattern(String apacheLogPattern) { Pattern pattern = Pattern.compile(APACHE_LOG_CONFIG_TOKEN_REPLACEMENT_REGEX); Matcher matcher = pattern.matcher(apacheLogPattern); StringBuilder logRegexBuff = new StringBuilder(); int pos = 0; while (matcher.find()) { logRegexBuff.append(apacheLogPattern.substring(pos, matcher.start())); logRegexBuff.append(mapConfigTokenToRegex(matcher.group())); pos = matcher.end(); } if (pos < apacheLogPattern.length()) { logRegexBuff.append(apacheLogPattern.substring(pos, apacheLogPattern.length())); } String logRegex = logRegexBuff.toString().trim(); // return logRegex.isEmpty() ? null : "(?m)^" + logRegex; return logRegex.isEmpty() ? null : '^' + logRegex; // NON-NLS } /** * Maps Apache access log configuration pattern token to user defined RegEx string. If no user defined mapping is * found, then default mapping is used. * * @param configToken * Apache access log configuration pattern token string * * @return RegEx string matching configuration token */ private String mapConfigTokenToRegex(String configToken) { String confRegexMapping = findMapping(configToken); if (confRegexMapping != null) { return confRegexMapping; } return mapConfigTokenRegexDefault(configToken); } /** * Finds user defined mapping of Apache access log configuration pattern token to RegEx string. * * @param configToken * Apache access log configuration pattern token string * * @return mapped RegEx string */ private String findMapping(String configToken) { String regex = findRegexMapping(configToken, userRegexMappings); if (StringUtils.isEmpty(regex)) { regex = findRegexMapping(configToken, configRegexMappings); } return StringUtils.isEmpty(regex) ? null : regex; } private static String findRegexMapping(String configToken, Map<String, String> regexMappings) { if (regexMappings != null) { for (Map.Entry<String, String> e : regexMappings.entrySet()) { if (isMatchingPattern(configToken, e.getKey())) { return e.getValue(); } } } return null; } /** * Returns default RegEx string for defined Apache access log configuration token. * * @param configToken * Apache access log configuration token string * * @return default RegEx string for configuration token */ private static String mapConfigTokenRegexDefault(String configToken) { if (isMatchingPattern(configToken, "%*s")) { // NON-NLS return STATUS_LOG_TOKEN_REGEX; } else if (isMatchingPattern(configToken, "%*r")) { // NON-NLS return REQUEST_LOG_TOKEN_REGEX; } return DEFAULT_LOG_TOKEN_REGEX; } /** * Checks if Apache access log configuration token matches defined pattern. * * @param configToken * Apache access log configuration token string * @param pattern * pattern string to match configuration token * * @return {@code true} if Apache access log configuration token matches defined pattern */ private static boolean isMatchingPattern(String configToken, String pattern) { String p = pattern.replace("*", "\\S*"); // NON-NLS return configToken.matches(p); } private static String makeNamedGroup(String name) { return "(?<" + name + ">\\S+)"; // NON-NLS } }