Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.metron.parsing.parsers; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; import org.apache.commons.lang3.StringUtils; import org.apache.metron.parsing.utils.GrokUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.code.regexp.Matcher; import com.google.code.regexp.Pattern; public class MetronGrok implements Serializable { private static final long serialVersionUID = 2002441320075020721L; private static final Logger LOG = LoggerFactory.getLogger(MetronGrok.class); /** * Named regex of the originalGrokPattern. */ private String namedRegex; /** * Map of the named regex of the originalGrokPattern * with id = namedregexid and value = namedregex. */ private Map<String, String> namedRegexCollection; /** * Original {@code Grok} pattern (expl: %{IP}). */ private String originalGrokPattern; /** * Pattern of the namedRegex. */ private Pattern compiledNamedRegex; /** * {@code Grok} discovery. */ private Map<String, String> grokPatternDefinition; /** only use in grok discovery. */ private String savedPattern; /** * Create Empty {@code Grok}. */ public static final MetronGrok EMPTY = new MetronGrok(); /** * Create a new <i>empty</i>{@code Grok} object. */ public MetronGrok() { originalGrokPattern = StringUtils.EMPTY; namedRegex = StringUtils.EMPTY; compiledNamedRegex = null; grokPatternDefinition = new TreeMap<String, String>(); namedRegexCollection = new TreeMap<String, String>(); savedPattern = StringUtils.EMPTY; } public String getSaved_pattern() { return savedPattern; } public void setSaved_pattern(String savedpattern) { this.savedPattern = savedpattern; } /** * Create a {@code Grok} instance with the given patterns file and * a {@code Grok} pattern. * * @param grokPatternPath Path to the pattern file * @param grokExpression - <b>OPTIONAL</b> - Grok pattern to compile ex: %{APACHELOG} * @return {@code Grok} instance * @throws Exception */ public static MetronGrok create(String grokPatternPath, String grokExpression) throws Exception { if (StringUtils.isBlank(grokPatternPath)) { throw new Exception("{grokPatternPath} should not be empty or null"); } MetronGrok g = new MetronGrok(); g.addPatternFromFile(grokPatternPath); if (StringUtils.isNotBlank(grokExpression)) { g.compile(grokExpression); } return g; } /** * Create a {@code Grok} instance with the given grok patterns file. * * @param grokPatternPath : Path to the pattern file * @return Grok * @throws Exception */ public static MetronGrok create(String grokPatternPath) throws Exception { return create(grokPatternPath, null); } /** * Add custom pattern to grok in the runtime. * * @param name : Pattern Name * @param pattern : Regular expression Or {@code Grok} pattern * @throws Exception **/ public void addPattern(String name, String pattern) throws Exception { if (StringUtils.isBlank(name)) { throw new Exception("Invalid Pattern name"); } if (StringUtils.isBlank(name)) { throw new Exception("Invalid Pattern"); } grokPatternDefinition.put(name, pattern); } /** * Copy the given Map of patterns (pattern name, regular expression) to {@code Grok}, * duplicate element will be override. * * @param cpy : Map to copy * @throws Exception **/ public void copyPatterns(Map<String, String> cpy) throws Exception { if (cpy == null) { throw new Exception("Invalid Patterns"); } if (cpy.isEmpty()) { throw new Exception("Invalid Patterns"); } for (Map.Entry<String, String> entry : cpy.entrySet()) { grokPatternDefinition.put(entry.getKey().toString(), entry.getValue().toString()); } } /** * Get the current map of {@code Grok} pattern. * * @return Patterns (name, regular expression) */ public Map<String, String> getPatterns() { return grokPatternDefinition; } /** * Get the named regex from the {@code Grok} pattern. <p></p> * See {@link #compile(String)} for more detail. * @return named regex */ public String getNamedRegex() { return namedRegex; } /** * Add patterns to {@code Grok} from the given file. * * @param file : Path of the grok pattern * @throws Exception */ public void addPatternFromFile(String file) throws Exception { File f = new File(file); if (!f.exists()) { throw new Exception("Pattern not found"); } if (!f.canRead()) { throw new Exception("Pattern cannot be read"); } FileReader r = null; try { r = new FileReader(f); addPatternFromReader(r); } catch (FileNotFoundException e) { throw new Exception(e.getMessage()); } catch (@SuppressWarnings("hiding") IOException e) { throw new Exception(e.getMessage()); } finally { try { if (r != null) { r.close(); } } catch (IOException io) { // TODO(anthony) : log the error } } } /** * Add patterns to {@code Grok} from a Reader. * * @param r : Reader with {@code Grok} patterns * @throws Exception */ public void addPatternFromReader(Reader r) throws Exception { BufferedReader br = new BufferedReader(r); String line; // We dont want \n and commented line Pattern pattern = Pattern.compile("^([A-z0-9_]+)\\s+(.*)$"); try { while ((line = br.readLine()) != null) { Matcher m = pattern.matcher(line); if (m.matches()) { this.addPattern(m.group(1), m.group(2)); } } br.close(); } catch (IOException e) { throw new Exception(e.getMessage()); } catch (Exception e) { throw new Exception(e.getMessage()); } } /** * Match the given <tt>log</tt> with the named regex. * And return the json representation of the matched element * * @param log : log to match * @return json representation og the log */ public String capture(String log) { MetronMatch match = match(log); match.captures(); return match.toJson(); } /** * Match the given list of <tt>log</tt> with the named regex * and return the list of json representation of the matched elements. * * @param logs : list of log * @return list of json representation of the log */ public List<String> captures(List<String> logs) { List<String> matched = new ArrayList<String>(); for (String log : logs) { MetronMatch match = match(log); match.captures(); matched.add(match.toJson()); } return matched; } /** * Match the given <tt>text</tt> with the named regex * {@code Grok} will extract data from the string and get an extence of {@link Match}. * * @param text : Single line of log * @return Grok Match */ public MetronMatch match(String text) { if (compiledNamedRegex == null || StringUtils.isBlank(text)) { return MetronMatch.EMPTY; } Matcher m = compiledNamedRegex.matcher(text); MetronMatch match = new MetronMatch(); if (m.find()) { match.setSubject(text); match.setGrok(this); match.setMatch(m); match.setStart(m.start(0)); match.setEnd(m.end(0)); } return match; } /** * Compile the {@code Grok} pattern to named regex pattern. * * @param pattern : Grok pattern (ex: %{IP}) * @throws Exception */ public void compile(String pattern) throws Exception { if (StringUtils.isBlank(pattern)) { throw new Exception("{pattern} should not be empty or null"); } namedRegex = pattern; originalGrokPattern = pattern; int index = 0; /** flag for infinite recurtion */ int iterationLeft = 1000; Boolean continueIteration = true; // Replace %{foo} with the regex (mostly groupname regex) // and then compile the regex while (continueIteration) { continueIteration = false; if (iterationLeft <= 0) { throw new Exception("Deep recursion pattern compilation of " + originalGrokPattern); } iterationLeft--; Matcher m = GrokUtils.GROK_PATTERN.matcher(namedRegex); // Match %{Foo:bar} -> pattern name and subname // Match %{Foo=regex} -> add new regex definition if (m.find()) { continueIteration = true; Map<String, String> group = m.namedGroups(); if (group.get("definition") != null) { try { addPattern(group.get("pattern"), group.get("definition")); group.put("name", group.get("name") + "=" + group.get("definition")); } catch (Exception e) { // Log the exeception } } namedRegexCollection.put("name" + index, (group.get("subname") != null ? group.get("subname") : group.get("name"))); namedRegex = StringUtils.replace(namedRegex, "%{" + group.get("name") + "}", "(?<name" + index + ">" + grokPatternDefinition.get(group.get("pattern")) + ")"); // System.out.println(_expanded_pattern); index++; } } if (namedRegex.isEmpty()) { throw new Exception("Pattern not fount"); } // Compile the regex compiledNamedRegex = Pattern.compile(namedRegex); } /** * Original grok pattern used to compile to the named regex. * * @return String Original Grok pattern */ public String getOriginalGrokPattern() { return originalGrokPattern; } /** * Get the named regex from the given id. * * @param id : named regex id * @return String of the named regex */ public String getNamedRegexCollectionById(String id) { return namedRegexCollection.get(id); } /** * Get the full collection of the named regex. * * @return named RegexCollection */ public Map<String, String> getNamedRegexCollection() { return namedRegexCollection; } }