io.sugo.grok.api.Grok.java Source code

Java tutorial

Introduction

Here is the source code for io.sugo.grok.api.Grok.java

Source

/*******************************************************************************
 * Copyright 2014 Anthony Corbacho and contributors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package io.sugo.grok.api;

import io.sugo.grok.api.exception.GrokException;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * {@code Grok} parse arbitrary text and structure it.<br>
 *
 * {@code Grok} is simple API that allows you to easily parse logs
 * and other files (single line). With {@code Grok},
 * you can turn unstructured log and event data into structured data (JSON).
 *<br>
 * example:<br>
 * <pre>
 *  Grok grok = Grok.create("patterns/patterns");
 *  grok.compile("%{USER}");
 *  Match gm = grok.match("root");
 *  gm.captures();
 * </pre>
 *
 * @since 0.0.1
 * @author anthonycorbacho
 */
public class Grok implements Serializable {

    private static final Logger LOG = LoggerFactory.getLogger(Grok.class);
    /**
     * Named regex of the originalGrokPattern.
     */
    private String namedRegex;
    /**
     * Map of the named regex of the originalGrokPattern
     * with id = namedregexid and value = namedregex.
     */
    private Map<String, String> namedRegexCollection;
    /**
     * Original {@code Grok} pattern (expl: %{IP}).
     */
    private String originalGrokPattern;
    /**
     * Pattern of the namedRegex.
     */
    private Pattern compiledNamedRegex;
    /**
     * {@code Grok} discovery.
     */
    private Discovery disco;
    /**
     * {@code Grok} patterns definition.
     */
    private Map<String, String> grokPatternDefinition;

    /** only use in grok discovery. */
    private String savedPattern;

    /**
     * Create Empty {@code Grok}.
     */
    public static final Grok EMPTY = new Grok();

    /**
     * Create a new <i>empty</i>{@code Grok} object.
     */
    public Grok() {
        originalGrokPattern = StringUtils.EMPTY;
        disco = null;
        namedRegex = StringUtils.EMPTY;
        compiledNamedRegex = null;
        grokPatternDefinition = new TreeMap<String, String>();
        namedRegexCollection = new TreeMap<String, String>();
        savedPattern = StringUtils.EMPTY;
    }

    public String getSaved_pattern() {
        return savedPattern;
    }

    public void setSaved_pattern(String savedpattern) {
        this.savedPattern = savedpattern;
    }

    /**
     * Create a {@code Grok} instance with the given patterns file and
     * a {@code Grok} pattern.
     *
     * @param grokPatternPath Path to the pattern file
     * @param grokExpression  - <b>OPTIONAL</b> - Grok pattern to compile ex: %{APACHELOG}
     * @return {@code Grok} instance
     * @throws GrokException runtime expt
     */
    public static Grok create(String grokPatternPath, String grokExpression) throws GrokException {
        if (StringUtils.isBlank(grokPatternPath)) {
            throw new GrokException("{grokPatternPath} should not be empty or null");
        }
        Grok g = new Grok();
        g.addPatternFromFile(grokPatternPath);
        if (StringUtils.isNotBlank(grokExpression)) {
            g.compile(grokExpression, false);
        }
        return g;
    }

    /**
     * Create a {@code Grok} instance with the given grok patterns file.
     *
     * @param  grokPatternPath : Path to the pattern file
     * @return Grok
     * @throws GrokException runtime expt
     */
    public static Grok create(String grokPatternPath) throws GrokException {
        return create(grokPatternPath, null);
    }

    /**
     * Add custom pattern to grok in the runtime.
     *
     * @param name : Pattern Name
     * @param pattern : Regular expression Or {@code Grok} pattern
     * @throws GrokException runtime expt
     **/
    public void addPattern(String name, String pattern) throws GrokException {
        if (StringUtils.isBlank(name)) {
            throw new GrokException("Invalid Pattern name");
        }
        if (StringUtils.isBlank(pattern)) {
            throw new GrokException("Invalid Pattern");
        }
        grokPatternDefinition.put(name, pattern);
    }

    /**
     * Copy the given Map of patterns (pattern name, regular expression) to {@code Grok},
     * duplicate element will be override.
     *
     * @param cpy : Map to copy
     * @throws GrokException runtime expt
     **/
    public void copyPatterns(Map<String, String> cpy) throws GrokException {
        if (cpy == null) {
            throw new GrokException("Invalid Patterns");
        }

        if (cpy.isEmpty()) {
            throw new GrokException("Invalid Patterns");
        }
        for (Map.Entry<String, String> entry : cpy.entrySet()) {
            grokPatternDefinition.put(entry.getKey().toString(), entry.getValue().toString());
        }
    }

    /**
     * Get the current map of {@code Grok} pattern.
     *
     * @return Patterns (name, regular expression)
     */
    public Map<String, String> getPatterns() {
        return grokPatternDefinition;
    }

    /**
     * Get the named regex from the {@code Grok} pattern. <br>
     * See {@link #compile(String)} for more detail.
     * @return named regex
     */
    public String getNamedRegex() {
        return namedRegex;
    }

    /**
     * Add patterns to {@code Grok} from the given file.
     *
     * @param file : Path of the grok pattern
     * @throws GrokException runtime expt
     */
    public void addPatternFromFile(String file) throws GrokException {

        File f = new File(file);
        if (!f.exists()) {
            throw new GrokException("Pattern not found");
        }

        if (!f.canRead()) {
            throw new GrokException("Pattern cannot be read");
        }

        FileReader r = null;
        try {
            r = new FileReader(f);
            addPatternFromReader(r);
        } catch (FileNotFoundException e) {
            throw new GrokException(e.getMessage());
        } catch (@SuppressWarnings("hiding") IOException e) {
            throw new GrokException(e.getMessage());
        } finally {
            try {
                if (r != null) {
                    r.close();
                }
            } catch (IOException io) {
                // TODO(anthony) : log the error
            }
        }
    }

    /**
     * Add patterns to {@code Grok} from a Reader.
     *
     * @param r : Reader with {@code Grok} patterns
     * @throws GrokException runtime expt
     */
    public void addPatternFromReader(Reader r) throws GrokException {
        BufferedReader br = new BufferedReader(r);
        String line;
        // We dont want \n and commented line
        Pattern pattern = Pattern.compile("^([A-z0-9_]+)\\s+(.*)$");
        try {
            while ((line = br.readLine()) != null) {
                Matcher m = pattern.matcher(line);
                if (m.matches()) {
                    this.addPattern(m.group(1), m.group(2));
                }
            }
            br.close();
        } catch (IOException e) {
            throw new GrokException(e.getMessage());
        } catch (GrokException e) {
            throw new GrokException(e.getMessage());
        }

    }

    /**
     * Match the given <tt>log</tt> with the named regex.
     * And return the json representation of the matched element
     *
     * @param log : log to match
     * @return json representation og the log
     */
    public String capture(String log) {
        Match match = match(log);
        match.captures();
        return match.toJson();
    }

    /**
     * Match the given list of <tt>log</tt> with the named regex
     * and return the list of json representation of the matched elements.
     *
     * @param logs : list of log
     * @return list of json representation of the log
     */
    public List<String> captures(List<String> logs) {
        List<String> matched = new ArrayList<String>();
        for (String log : logs) {
            Match match = match(log);
            match.captures();
            matched.add(match.toJson());
        }
        return matched;
    }

    /**
     * Match the given <tt>text</tt> with the named regex
     * {@code Grok} will extract data from the string and get an extence of {@link Match}.
     *
     * @param text : Single line of log
     * @return Grok Match
     */
    public Match match(String text) {
        if (compiledNamedRegex == null || StringUtils.isBlank(text)) {
            return Match.EMPTY;
        }

        Matcher m = compiledNamedRegex.matcher(text);
        Match match = new Match();
        if (m.find()) {
            match.setSubject(text);
            match.setGrok(this);
            match.setMatch(m);
            match.setStart(m.start(0));
            match.setEnd(m.end(0));
        }
        return match;
    }

    /**
     * Compile the {@code Grok} pattern to named regex pattern.
     *
     * @param pattern : Grok pattern (ex: %{IP})
     * @throws GrokException runtime expt
     */
    public void compile(String pattern) throws GrokException {
        compile(pattern, false);
    }

    /**
     * Compile the {@code Grok} pattern to named regex pattern.
     * 
     * @param pattern : Grok pattern (ex: %{IP})
     * @param namedOnly : Whether to capture named expressions only or not (i.e. %{IP:ip} but not ${IP})
     * @throws GrokException runtime expt
      */
    public void compile(String pattern, boolean namedOnly) throws GrokException {

        if (StringUtils.isBlank(pattern)) {
            throw new GrokException("{pattern} should not be empty or null");
        }

        namedRegex = pattern;
        originalGrokPattern = pattern;
        int index = 0;
        /** flag for infinite recurtion */
        int iterationLeft = 1000;
        Boolean continueIteration = true;

        // Replace %{foo} with the regex (mostly groupname regex)
        // and then compile the regex
        while (continueIteration) {
            continueIteration = false;
            if (iterationLeft <= 0) {
                throw new GrokException("Deep recursion pattern compilation of " + originalGrokPattern);
            }
            iterationLeft--;

            Matcher m = GrokUtils.GROK_PATTERN.matcher(namedRegex);
            // Match %{Foo:bar} -> pattern name and subname
            // Match %{Foo=regex} -> add new regex definition 
            if (m.find()) {
                continueIteration = true;
                Map<String, String> group = GrokUtils.namedGroups(m, m.group());
                if (group.get("definition") != null) {
                    try {
                        addPattern(group.get("pattern"), group.get("definition"));
                        group.put("name", group.get("name") + "=" + group.get("definition"));
                    } catch (GrokException e) {
                        // Log the exeception
                    }
                }
                int count = StringUtils.countMatches(namedRegex, "%{" + group.get("name") + "}");
                for (int i = 0; i < count; i++) {
                    String replacement = String.format("(?<name%d>%s)", index,
                            grokPatternDefinition.get(group.get("pattern")));
                    if (namedOnly && group.get("subname") == null) {
                        replacement = grokPatternDefinition.get(group.get("pattern"));
                    }
                    namedRegexCollection.put("name" + index,
                            (group.get("subname") != null ? group.get("subname") : group.get("name")));
                    namedRegex = StringUtils.replace(namedRegex, "%{" + group.get("name") + "}", replacement, 1);
                    // System.out.println(_expanded_pattern);
                    index++;
                }
            }
        }

        if (namedRegex.isEmpty()) {
            throw new GrokException("Pattern not fount");
        }
        // Compile the regex
        compiledNamedRegex = Pattern.compile(namedRegex);
    }

    /**
     * {@code Grok} will try to find the best expression that will match your input.
     * {@link Discovery}
     *
     * @param input : Single line of log
     * @return the Grok pattern
     */
    public String discover(String input) {

        if (disco == null) {
            disco = new Discovery(this);
        }
        return disco.discover(input);
    }

    /**
     * Original grok pattern used to compile to the named regex.
     *
     * @return String Original Grok pattern
     */
    public String getOriginalGrokPattern() {
        return originalGrokPattern;
    }

    /**
     * Get the named regex from the given id.
     *
     * @param id : named regex id
     * @return String of the named regex
     */
    public String getNamedRegexCollectionById(String id) {
        return namedRegexCollection.get(id);
    }

    /**
     * Get the full collection of the named regex.
     *
     * @return named RegexCollection
     */
    public Map<String, String> getNamedRegexCollection() {
        return namedRegexCollection;
    }
}