com.espertech.esper.rowregex.EventRowRegexHelper.java Source code

Java tutorial

Introduction

Here is the source code for com.espertech.esper.rowregex.EventRowRegexHelper.java

Source

/*
 * *************************************************************************************
 *  Copyright (C) 2008 EsperTech, Inc. All rights reserved.                            *
 *  http://esper.codehaus.org                                                          *
 *  http://www.espertech.com                                                           *
 *  ---------------------------------------------------------------------------------- *
 *  The software in this package is published under the terms of the GPL license       *
 *  a copy of which has been included with this distribution in the license.txt file.  *
 * *************************************************************************************
 */

package com.espertech.esper.rowregex;

import com.espertech.esper.collection.Pair;
import com.espertech.esper.epl.expression.ExprNode;
import com.espertech.esper.epl.expression.ExprNode;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.util.*;

/**
 * Helper for match recognize.
 */
public class EventRowRegexHelper {
    private static final Log log = LogFactory.getLog(EventRowRegexHelper.class);

    /**
     * Inspect variables recursively.
     * @param parent parent regex expression node
     * @param isMultiple if the variable in the stack is multiple of single
     * @param variablesSingle single variables list
     * @param variablesMultiple group variables list
     */
    protected static void recursiveInspectVariables(RowRegexExprNode parent, boolean isMultiple,
            Set<String> variablesSingle, Set<String> variablesMultiple) {
        if (parent instanceof RowRegexExprNodeNested) {
            RowRegexExprNodeNested nested = (RowRegexExprNodeNested) parent;
            for (RowRegexExprNode child : parent.getChildNodes()) {
                recursiveInspectVariables(child, nested.getType().isMultipleMatches() || isMultiple,
                        variablesSingle, variablesMultiple);
            }
        } else if (parent instanceof RowRegexExprNodeAlteration) {
            for (RowRegexExprNode childAlteration : parent.getChildNodes()) {
                LinkedHashSet<String> singles = new LinkedHashSet<String>();
                LinkedHashSet<String> multiples = new LinkedHashSet<String>();

                recursiveInspectVariables(childAlteration, isMultiple, singles, multiples);

                variablesMultiple.addAll(multiples);
                variablesSingle.addAll(singles);
            }
            variablesSingle.removeAll(variablesMultiple);
        } else if (parent instanceof RowRegexExprNodeAtom) {
            RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) parent;
            String name = atom.getTag();
            if (variablesMultiple.contains(name)) {
                return;
            }
            if (variablesSingle.contains(name)) {
                variablesSingle.remove(name);
                variablesMultiple.add(name);
                return;
            }
            if (atom.getType().isMultipleMatches()) {
                variablesMultiple.add(name);
                return;
            }
            if (isMultiple) {
                variablesMultiple.add(name);
            } else {
                variablesSingle.add(name);
            }
        } else {
            for (RowRegexExprNode child : parent.getChildNodes()) {
                recursiveInspectVariables(child, isMultiple, variablesSingle, variablesMultiple);
            }
        }
    }

    /**
     * Build a list of start states from the parent node.
     * @param parent to build start state for
     * @param variableDefinitions each variable and its expressions
     * @param variableStreams variable name and its stream number
     * @return strand of regex state nodes
     */
    protected static RegexNFAStrandResult recursiveBuildStartStates(RowRegexExprNode parent,
            Map<String, ExprNode> variableDefinitions, Map<String, Pair<Integer, Boolean>> variableStreams) {
        Stack<Integer> nodeNumStack = new Stack<Integer>();

        RegexNFAStrand strand = recursiveBuildStatesInternal(parent, variableDefinitions, variableStreams,
                nodeNumStack);

        // add end state
        RegexNFAStateEnd end = new RegexNFAStateEnd();
        for (RegexNFAStateBase endStates : strand.getEndStates()) {
            endStates.addState(end);
        }

        // assign node num as a counter
        int nodeNumberFlat = 0;
        for (RegexNFAStateBase theBase : strand.getAllStates()) {
            theBase.setNodeNumFlat(nodeNumberFlat++);
        }

        return new RegexNFAStrandResult(new ArrayList<RegexNFAState>(strand.getStartStates()),
                strand.getAllStates());
    }

    private static RegexNFAStrand recursiveBuildStatesInternal(RowRegexExprNode node,
            Map<String, ExprNode> variableDefinitions, Map<String, Pair<Integer, Boolean>> variableStreams,
            Stack<Integer> nodeNumStack) {
        if (node instanceof RowRegexExprNodeAlteration) {
            int nodeNum = 0;

            List<RegexNFAStateBase> cumulativeStartStates = new ArrayList<RegexNFAStateBase>();
            List<RegexNFAStateBase> cumulativeStates = new ArrayList<RegexNFAStateBase>();
            List<RegexNFAStateBase> cumulativeEndStates = new ArrayList<RegexNFAStateBase>();

            boolean isPassthrough = false;
            for (RowRegexExprNode child : node.getChildNodes()) {
                nodeNumStack.push(nodeNum);
                RegexNFAStrand strand = recursiveBuildStatesInternal(child, variableDefinitions, variableStreams,
                        nodeNumStack);
                nodeNumStack.pop();

                cumulativeStartStates.addAll(strand.getStartStates());
                cumulativeStates.addAll(strand.getAllStates());
                cumulativeEndStates.addAll(strand.getEndStates());
                if (strand.isPassthrough()) {
                    isPassthrough = true;
                }

                nodeNum++;
            }

            return new RegexNFAStrand(cumulativeStartStates, cumulativeEndStates, cumulativeStates, isPassthrough);
        } else if (node instanceof RowRegexExprNodeConcatenation) {
            int nodeNum = 0;

            boolean isPassthrough = true;
            List<RegexNFAStateBase> cumulativeStates = new ArrayList<RegexNFAStateBase>();
            RegexNFAStrand[] strands = new RegexNFAStrand[node.getChildNodes().size()];

            for (RowRegexExprNode child : node.getChildNodes()) {
                nodeNumStack.push(nodeNum);
                strands[nodeNum] = recursiveBuildStatesInternal(child, variableDefinitions, variableStreams,
                        nodeNumStack);
                nodeNumStack.pop();

                cumulativeStates.addAll(strands[nodeNum].getAllStates());
                if (!strands[nodeNum].isPassthrough()) {
                    isPassthrough = false;
                }

                nodeNum++;
            }

            // determine start states: all states until the first non-passthrough start state
            List<RegexNFAStateBase> startStates = new ArrayList<RegexNFAStateBase>();
            for (int i = 0; i < strands.length; i++) {
                startStates.addAll(strands[i].getStartStates());
                if (!strands[i].isPassthrough()) {
                    break;
                }
            }

            // determine end states: all states from the back until the last non-passthrough end state
            List<RegexNFAStateBase> endStates = new ArrayList<RegexNFAStateBase>();
            for (int i = strands.length - 1; i >= 0; i--) {
                endStates.addAll(strands[i].getEndStates());
                if (!strands[i].isPassthrough()) {
                    break;
                }
            }

            // hook up the end state of each strand with the start states of each next strand
            for (int i = strands.length - 1; i >= 1; i--) {
                RegexNFAStrand current = strands[i];
                for (int j = i - 1; j >= 0; j--) {
                    RegexNFAStrand prior = strands[j];

                    for (RegexNFAStateBase endState : prior.getEndStates()) {
                        for (RegexNFAStateBase startState : current.getStartStates()) {
                            endState.addState(startState);
                        }
                    }

                    if (!prior.isPassthrough()) {
                        break;
                    }
                }
            }

            return new RegexNFAStrand(startStates, endStates, cumulativeStates, isPassthrough);
        } else if (node instanceof RowRegexExprNodeNested) {
            RowRegexExprNodeNested nested = (RowRegexExprNodeNested) node;
            nodeNumStack.push(0);
            RegexNFAStrand strand = recursiveBuildStatesInternal(node.getChildNodes().get(0), variableDefinitions,
                    variableStreams, nodeNumStack);
            nodeNumStack.pop();

            boolean isPassthrough = strand.isPassthrough() || nested.getType().isOptional();

            // if this is a repeating node then pipe back each end state to each begin state
            if (nested.getType().isMultipleMatches()) {
                for (RegexNFAStateBase endstate : strand.getEndStates()) {
                    for (RegexNFAStateBase startstate : strand.getStartStates()) {
                        if (!endstate.getNextStates().contains(startstate)) {
                            endstate.getNextStates().add(startstate);
                        }
                    }
                }
            }
            return new RegexNFAStrand(strand.getStartStates(), strand.getEndStates(), strand.getAllStates(),
                    isPassthrough);
        } else {
            RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) node;

            // assign stream number for single-variables for most direct expression eval; multiple-variable gets -1
            int streamNum = variableStreams.get(atom.getTag()).getFirst();
            boolean multiple = variableStreams.get(atom.getTag()).getSecond();
            ExprNode expressionDef = variableDefinitions.get(atom.getTag());

            RegexNFAStateBase nextState;
            if ((atom.getType() == RegexNFATypeEnum.ZERO_TO_MANY)
                    || (atom.getType() == RegexNFATypeEnum.ZERO_TO_MANY_RELUCTANT)) {
                nextState = new RegexNFAStateZeroToMany(toString(nodeNumStack), atom.getTag(), streamNum, multiple,
                        atom.getType().isGreedy(), expressionDef);
            } else if ((atom.getType() == RegexNFATypeEnum.ONE_TO_MANY)
                    || (atom.getType() == RegexNFATypeEnum.ONE_TO_MANY_RELUCTANT)) {
                nextState = new RegexNFAStateOneToMany(toString(nodeNumStack), atom.getTag(), streamNum, multiple,
                        atom.getType().isGreedy(), expressionDef);
            } else if ((atom.getType() == RegexNFATypeEnum.ONE_OPTIONAL)
                    || (atom.getType() == RegexNFATypeEnum.ONE_OPTIONAL_RELUCTANT)) {
                nextState = new RegexNFAStateOneOptional(toString(nodeNumStack), atom.getTag(), streamNum, multiple,
                        atom.getType().isGreedy(), expressionDef);
            } else if (expressionDef == null) {
                nextState = new RegexNFAStateAnyOne(toString(nodeNumStack), atom.getTag(), streamNum, multiple);
            } else {
                nextState = new RegexNFAStateFilter(toString(nodeNumStack), atom.getTag(), streamNum, multiple,
                        expressionDef);
            }

            return new RegexNFAStrand(Collections.singletonList(nextState), Collections.singletonList(nextState),
                    Collections.singletonList(nextState), atom.getType().isOptional());
        }
    }

    private static String toString(Stack<Integer> nodeNumStack) {
        StringBuilder builder = new StringBuilder();
        String delimiter = "";
        for (Integer atom : nodeNumStack) {
            builder.append(delimiter);
            builder.append(Integer.toString(atom));
            delimiter = ".";
        }
        return builder.toString();
    }
}