com.cloudera.flume.conf.FlumeBuilder.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.flume.conf.FlumeBuilder.java

Source

/**
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.flume.conf;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.antlr.runtime.ANTLRFileStream;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTree;
import org.apache.commons.lang.StringEscapeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.collector.CollectorSink;
import com.cloudera.flume.conf.SinkFactory.SinkBuilder;
import com.cloudera.flume.core.BackOffFailOverSink;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventSink;
import com.cloudera.flume.core.EventSinkDecorator;
import com.cloudera.flume.core.EventSource;
import com.cloudera.flume.core.FanOutSink;
import com.cloudera.flume.handlers.rolling.RollSink;
import com.cloudera.flume.master.availability.FailoverChainSink;
import com.cloudera.util.Pair;
import com.google.common.base.Preconditions;

/**
 * This wraps parsers and factories so that a simple string in the flume config
 * specification language can be used to generate complicated configurations.
 * 
 * NOTE: The language and the names of sources and sinks are case sensitive.
 * 
 * Here are some example specifications -- (formatted as 'code // comment'):
 * 
 * counter("foo") // Creates a counter sink with name foo
 * 
 * [ counter("foo") , thrift("host",1234) ] // FanOutSink with counter and a
 * thrift sink to host:1234
 * 
 * { intervalSampler(100) => counter("samplefoo") } // take every 100th event,
 * send to samplefoo counter
 * 
 * < thrift("host",1234) ? backup > // send to thrift at host:1234, and if fail
 * sends to backup.
 * 
 * let foo := counter("bar") in < { failsometimes => foo } ? foo > // let
 * variable substitution, both foo's after 'in' are the *same* instance.
 * 
 * And here's the fun part -- these are fully composable.
 * 
 * [ <thrift("host",1234) ? {rolling(1000) => writeaheadlog("/tmp/flume") } > ,
 * { intervalSampler(100) => grephisto("/specfile") } ]
 * 
 * // local report
 * 
 * NOTE: some of the names may change, the syntax may change, and it currently
 * doesn't do everything I would like, but it is a start.
 * 
 * TODO(jon) add usage to each Source/Sink/SinkDeco builder.
 */
public class FlumeBuilder {
    static final Logger LOG = LoggerFactory.getLogger(FlumeBuilder.class);

    static SourceFactory srcFactory = new SourceFactoryImpl();
    static SinkFactory sinkFactory = new SinkFactoryImpl();

    enum ASTNODE {
        DEC, HEX, OCT, STRING, BOOL, FLOAT, // literals
        SINK, SOURCE, // sink or source
        KWARG, // kwarg support
        MULTI, DECO, BACKUP, LET, ROLL, GEN, FAILCHAIN, // compound sinks
        NODE, // combination of sink and source
    };

    public static void setSourceFactory(SourceFactory srcFact) {
        Preconditions.checkNotNull(srcFact);
        srcFactory = srcFact;
    }

    public static void setSinkFactory(SinkFactory snkFact) {
        Preconditions.checkNotNull(snkFact);
        sinkFactory = snkFact;
    }

    /**
     * This hooks a particular string to the lexer. From there it creates a parser
     * that can be started from different entities. The lexer and language are
     * case sensitive.
     */
    static FlumeDeployParser getDeployParser(String s) {
        FlumeDeployLexer lexer = new FlumeDeployLexer(new ANTLRStringStream(s));
        CommonTokenStream tokens = new CommonTokenStream(lexer);
        return new FlumeDeployParser(tokens);
    }

    static CommonTree parse(String s) throws RecognitionException {
        return (CommonTree) getDeployParser(s).deflist().getTree();
    }

    static CommonTree parseHost(String s) throws RecognitionException {
        return (CommonTree) getDeployParser(s).host().getTree();
    }

    static CommonTree parseLiteral(String s) throws RecognitionException {
        return (CommonTree) getDeployParser(s).literal().getTree();
    }

    public static CommonTree parseSink(String s) throws RecognitionException {
        FlumeDeployParser parser = getDeployParser(s);
        CommonTree ast = (CommonTree) parser.sink().getTree();
        return ast;
    }

    public static CommonTree parseSource(String s) throws RecognitionException {
        return (CommonTree) getDeployParser(s).source().getTree();
    }

    /**
     * This is for reading and parsing out a full configuration from a file.
     */
    static CommonTree parseFile(String filename) throws IOException, RecognitionException {

        // Create a scanner and parser that reads from the input stream passed to us
        FlumeDeployLexer lexer = new FlumeDeployLexer(new ANTLRFileStream(filename));
        CommonTokenStream tokens = new CommonTokenStream(lexer);
        FlumeDeployParser parser = new FlumeDeployParser(tokens);

        return (CommonTree) parser.deflist().getTree();
    }

    /**
     * This is for reading and parsing out a full configuration from a file.
     */
    static CommonTree parseNodeFile(String filename) throws IOException, RecognitionException {

        // Create a scanner and parser that reads from the input stream passed to us
        FlumeDeployLexer lexer = new FlumeDeployLexer(new ANTLRFileStream(filename));
        CommonTokenStream tokens = new CommonTokenStream(lexer);
        FlumeDeployParser parser = new FlumeDeployParser(tokens);

        return (CommonTree) parser.connection().getTree();
    }

    public static Pair<EventSource, EventSink> buildNode(Context context, File f)
            throws IOException, RecognitionException, FlumeSpecException {
        CommonTree t = parseNodeFile(f.getCanonicalPath());
        if (t.getText() != "NODE") {
            throw new FlumeSpecException("fail, expected node but had " + t.toStringTree());
        }
        // String host = t.getChild(0).getText();
        CommonTree tsrc = (CommonTree) t.getChild(0);
        CommonTree tsnk = (CommonTree) t.getChild(1);

        return new Pair<EventSource, EventSink>(buildEventSource(context, tsrc),
                buildEventSink(context, tsnk, sinkFactory));
    }

    /**
     * This parses a aggregate configuration (name: src|snk; ...) and returns a
     * map from logical node name to a source sink pair. Context is required now
     * because a Flumenode's PhysicalNode information may need to be passed in
     */
    @SuppressWarnings("unchecked")
    public static Map<String, Pair<String, String>> parseConf(Context ctx, String s) throws FlumeSpecException {
        try {
            CommonTree node = parse(s);
            Map<String, Pair<String, String>> cfg = new HashMap<String, Pair<String, String>>();
            for (CommonTree t : (List<CommonTree>) node.getChildren()) {
                // -1 is magic EOF value
                if (t.getType() == -1) {
                    break; // I am done.
                }

                if (t.getText() != "NODE") {
                    throw new FlumeSpecException("fail, expected node but had " + t.toStringTree());
                }

                if (t.getChildCount() != 3) {
                    throw new FlumeSpecException("fail, node didn't wasn't (name,src,snk): " + t.toStringTree());
                }
                String host = t.getChild(0).getText();
                CommonTree tsrc = (CommonTree) t.getChild(1);
                CommonTree tsnk = (CommonTree) t.getChild(2);

                Pair<String, String> p = new Pair<String, String>(FlumeSpecGen.genEventSource(tsrc),
                        FlumeSpecGen.genEventSink(tsnk));
                cfg.put(host, p);
            }
            return cfg;
        } catch (RecognitionException re) {
            LOG.error("Failure to parse and instantiate sink: '" + s + "'", re);
            throw new FlumeSpecException(re.toString());
        }

    }

    @SuppressWarnings("unchecked")
    public static Map<String, Pair<EventSource, EventSink>> build(Context context, String s)
            throws FlumeSpecException {
        try {
            CommonTree node = parse(s);
            Map<String, Pair<EventSource, EventSink>> cfg = new HashMap<String, Pair<EventSource, EventSink>>();
            for (CommonTree t : (List<CommonTree>) node.getChildren()) {
                // -1 is magic EOF value
                if (t.getType() == -1) {
                    break; // I am done.
                }
                if (t.getText() != "NODE") {
                    throw new FlumeSpecException("fail, expected node but had " + t.toStringTree());
                }
                String host = t.getChild(0).getText();
                CommonTree tsrc = (CommonTree) t.getChild(1);
                CommonTree tsnk = (CommonTree) t.getChild(2);

                Pair<EventSource, EventSink> p = new Pair<EventSource, EventSink>(buildEventSource(context, tsrc),
                        buildEventSink(context, tsnk, sinkFactory));
                cfg.put(host, p);
            }
            return cfg;
        } catch (RecognitionException re) {
            LOG.error("Failure to parse and instantiate sink: '" + s + "'", re);
            throw new FlumeSpecException(re.toString());
        }

    }

    /**
     * Build a flume source from a flume config specification.
     * 
     * This should only throw FlumeSpecExceptions (No illegal arg exceptions
     * anymore)
     */
    public static EventSource buildSource(Context ctx, String s) throws FlumeSpecException {
        try {
            CommonTree srcTree = parseSource(s);
            return buildEventSource(ctx, srcTree);
        } catch (RecognitionException re) {
            LOG.debug("Failure to parse and instantiate sink: '" + s + "'", re);
            throw new FlumeSpecException(re.toString());
        } catch (NumberFormatException nfe) {
            LOG.debug("Failure to parse and instantiate sink: '" + s + "'", nfe);
            throw new FlumeSpecException(nfe.getMessage());
        } catch (IllegalArgumentException iae) {
            LOG.debug("Failure to parse and instantiate sink: '" + s + "'", iae);
            throw new FlumeSpecException(iae.getMessage());
        } catch (RuntimeRecognitionException re) {
            LOG.debug("Failure to parse and instantiate sink: '" + s + "'", re);
            throw new FlumeSpecException(re.getMessage());
        }
    }

    /**
     * Build a flume sink from a flume config specification. Sinks can be much
     * more complicated than sources.
     * 
     * This should only throw FlumeSpecExceptions (No illegal arg exceptions
     * anymore)
     */
    public static EventSink buildSink(Context context, String s) throws FlumeSpecException {
        try {
            CommonTree snkTree = parseSink(s);
            return buildEventSink(context, snkTree, sinkFactory);
        } catch (RecognitionException re) {
            LOG.debug("Failure to parse and instantiate sink: '" + s + "'", re);
            throw new FlumeSpecException(re.toString());
        } catch (NumberFormatException nfe) {
            LOG.debug("Failure to parse and instantiate sink: '" + s + "'", nfe);
            throw new FlumeSpecException(nfe.getMessage());
        } catch (IllegalArgumentException iae) {
            LOG.debug("Failure to parse and instantiate sink: '" + s + "'", iae);
            throw new FlumeSpecException(iae.getMessage());
        } catch (RuntimeRecognitionException re) {
            LOG.debug("Failure to parse and instantiate sink: '" + s + "'", re);
            throw new FlumeSpecException(re.getMessage());
        }
    }

    @Deprecated
    public static EventSink buildSink(String s) throws FlumeSpecException {
        return buildSink(new Context(), s);
    }

    /**
     * Formats the three pieces to a node configuration back into a single
     * parsable line
     */
    public static String toLine(String name, String src, String snk) {
        return name + " : " + src + " | " + snk + ";";
    }

    /**
     * All of factories expect all string arguments.
     * 
     * Numbers are expected to be in decimal format.
     * 
     * If it is of type KWARG, it returns null
     * 
     * TODO (jon) move to builders, or allow builders to take Integers/Booleans as
     * well as Strings
     */
    public static String buildSimpleArg(CommonTree t) throws FlumeSpecException {
        ASTNODE type = ASTNODE.valueOf(t.getText()); // convert to enum
        switch (type) {
        case HEX:
            String hex = t.getChild(0).getText();
            Preconditions.checkArgument(hex.startsWith("0x")); // bad parser if this
            // happens
            hex = hex.substring(2);
            Long i = Long.parseLong(hex, 16); // use base 16 radix
            return i.toString();
        case DEC:
            return t.getChild(0).getText();
        case BOOL:
            return t.getChild(0).getText();
        case OCT:
            String oct = t.getChild(0).getText();
            // bad parser if these happen
            Preconditions.checkArgument(oct.startsWith("0"));
            Preconditions.checkArgument(!oct.startsWith("0x"));
            Long i2 = Long.parseLong(oct, 8); // us base 16 radix
            return i2.toString();
        case FLOAT:
            return t.getChild(0).getText();
        case STRING:
            String str = t.getChild(0).getText();
            Preconditions.checkArgument(str.startsWith("\"") && str.endsWith("\""));
            str = str.substring(1, str.length() - 1);
            return StringEscapeUtils.unescapeJava(str);
        case KWARG:
            return null;
        default:
            throw new FlumeSpecException("Not a node of literal type: " + t.toStringTree());
        }
    }

    public static Pair<String, CommonTree> buildKWArg(CommonTree t) {
        ASTNODE type = ASTNODE.valueOf(t.getText()); // convert to enum
        Preconditions.checkArgument("KWARG".equals(type.toString()));
        String kw = t.getChild(0).getText();
        CommonTree arg = (CommonTree) t.getChild(1);
        return new Pair<String, CommonTree>(kw, arg);
    }

    /**
     * This method populates the id, argv,and kwargs into ctx needed to build
     * source/sink/deco name
     * 
     * @param t
     * @param ctx
     * @return
     * @throws FlumeSpecException
     */
    @SuppressWarnings("unchecked")
    static Pair<String, List<String>> handleArgs(CommonTree t, Context ctx) throws FlumeSpecException {
        List<CommonTree> children = (List<CommonTree>) new ArrayList<CommonTree>(t.getChildren());
        String sinkType = children.remove(0).getText();
        List<String> args = new ArrayList<String>();
        for (CommonTree tr : children) {
            String arg = buildSimpleArg(tr);
            if (arg != null) {
                args.add(arg);
            } else {
                Pair<String, CommonTree> kwarg = buildKWArg(tr);
                ctx.putValue(kwarg.getLeft(), buildSimpleArg(kwarg.getRight()));
            }
        }
        return new Pair<String, List<String>>(sinkType, args);
    }

    static EventSource buildEventSource(Context context, CommonTree t) throws FlumeSpecException {
        ASTNODE type = ASTNODE.valueOf(t.getText()); // convert to enum
        switch (type) {
        case SOURCE: {
            Context ctx = new Context(context);
            Pair<String, List<String>> idArgs = handleArgs(t, ctx);
            String sourceType = idArgs.getLeft();
            List<String> args = idArgs.getRight();

            EventSource src = srcFactory.getSource(ctx, sourceType, args.toArray(new String[0]));
            if (src == null) {
                throw new FlumeIdException("Invalid source: " + FlumeSpecGen.genEventSource(t));
            }
            return src;
        }
        default:
            throw new FlumeSpecException("bad parse tree! Expected source but got " + t.toStringTree());
        }
    }

    @SuppressWarnings("unchecked")
    static EventSink buildEventSink(Context context, CommonTree t, SinkFactory sinkFactory)
            throws FlumeSpecException {
        ASTNODE type = ASTNODE.valueOf(t.getText()); // convert to enum
        switch (type) {
        case SINK: {

            Context ctx = new Context(context);
            Pair<String, List<String>> idargs = handleArgs(t, ctx);
            String sinkType = idargs.getLeft();
            List<String> args = idargs.getRight();

            EventSink snk = sinkFactory.getSink(ctx, sinkType, args.toArray(new String[0]));

            if (snk == null) {
                throw new FlumeIdException("Invalid sink: " + FlumeSpecGen.genEventSink(t));
            }
            return snk;
        }
        case MULTI: {
            List<CommonTree> elems = (List<CommonTree>) t.getChildren();
            List<EventSink> snks = new ArrayList<EventSink>();
            try {
                for (CommonTree tr : elems) {
                    EventSink s = buildEventSink(context, tr, sinkFactory);
                    snks.add(s);
                }
                FanOutSink<EventSink> sink = new FanOutSink<EventSink>(snks);
                return sink;
            } catch (FlumeSpecException ife) {
                // TODO (jon) do something if there was an intermediate failure
                throw ife;
            }
        }
        case DECO: {
            List<CommonTree> decoNodes = (List<CommonTree>) t.getChildren();
            Preconditions.checkArgument(decoNodes.size() <= 2, "Only supports one or no decorators per expression");
            // no decorators
            if (decoNodes.size() == 1) {
                CommonTree snkt = decoNodes.get(0);
                try {
                    EventSink singleSnk = buildEventSink(context, snkt, sinkFactory);
                    return singleSnk;
                } catch (FlumeSpecException ife) {
                    throw ife;
                }
            }

            // single decorator
            CommonTree deco = decoNodes.get(0);
            CommonTree decoSnk = decoNodes.get(1);
            EventSinkDecorator<EventSink> decoSink = buildEventSinkDecorator(context, deco);
            try {
                EventSink dsnk = buildEventSink(context, decoSnk, sinkFactory);
                decoSink.setSink(dsnk);
                return decoSink;
            } catch (FlumeSpecException ife) {
                // TODO (jon) need to cleanup after mainsink if this failed.
                throw ife;
            }
        }

        case BACKUP: {
            List<CommonTree> backupNodes = (List<CommonTree>) t.getChildren();
            Preconditions.checkArgument(backupNodes.size() == 2,
                    "Only supports two retry nodes per failover expression");
            CommonTree main = backupNodes.get(0);
            CommonTree backup = backupNodes.get(1);
            try {
                EventSink mainSink = buildEventSink(context, main, sinkFactory);
                EventSink backupSink = buildEventSink(context, backup, sinkFactory);
                return new BackOffFailOverSink(mainSink, backupSink);
            } catch (FlumeSpecException ife) {
                LOG.error("Failed to build Failover sink", ife);
                throw ife;
            }
        }

        case LET: {
            List<CommonTree> letNodes = (List<CommonTree>) t.getChildren();
            Preconditions.checkArgument(letNodes.size() == 3);
            String argName = letNodes.get(0).getText();
            CommonTree arg = letNodes.get(1);
            CommonTree body = letNodes.get(2);
            try {
                EventSink argSink = buildEventSink(context, arg, sinkFactory);

                // TODO (jon) This isn't exactly right. 'let' currently does
                // "substitution on parse", which means when there are multiple
                // instances of the let subexpression, it will get opened twice (which
                // is now illegal). This hack makes things work by relaxing open's
                // semantics so that multiple opens are ignored.

                // Another approach is to have "substitution on open". Let would work
                // differently than here. We would have LetDecorator that internally
                // keeps the subexpression and then body. The references to the
                // subexpression in the body would get replaced with a reference. On
                // open, the subexpression would be opened, and the body as well. As the
                // body is traversed, if the reference would not be opened, but would
                // substitute the (already) open subexpression in its place.

                EventSink argSinkRef = new EventSinkDecorator<EventSink>(argSink) {
                    boolean open = false;

                    @Override
                    public void open() throws IOException, InterruptedException {
                        if (open) {
                            return; // Do nothing because already open
                        }
                        open = true;
                        sink.open();
                    }

                    @Override
                    public void append(Event e) throws IOException, InterruptedException {
                        Preconditions.checkState(open);
                        sink.append(e);
                    }

                    @Override
                    public void close() throws IOException, InterruptedException {
                        open = false;
                        sink.close();
                    }

                };

                // add arg to context

                LinkedSinkFactory linkedFactory = new LinkedSinkFactory(sinkFactory, argName, argSinkRef);
                EventSink bodySink = buildEventSink(context, body, linkedFactory);
                return bodySink;
            } catch (FlumeSpecException ife) {
                throw ife;
            }
        }

        case ROLL: {
            List<CommonTree> rollArgs = (List<CommonTree>) t.getChildren();
            try {
                Preconditions.checkArgument(rollArgs.size() == 2,
                        "bad parse tree! " + t.toStringTree() + "roll only takes two arguments");
                CommonTree ctbody = rollArgs.get(0);
                Long period = Long.parseLong(buildSimpleArg(rollArgs.get(1)));
                String body = FlumeSpecGen.genEventSink(ctbody);
                // TODO (jon) replace the hard coded 250 with a parameterizable value
                RollSink roller = new RollSink(context, body, period, 250);
                return roller;
            } catch (IllegalArgumentException iae) {
                throw new FlumeSpecException(iae.getMessage());
            }

        }

        case GEN: {
            List<CommonTree> collArgs = (List<CommonTree>) t.getChildren();
            try {
                Preconditions.checkArgument(collArgs.size() >= 2,
                        "bad parse tree! " + t.toStringTree() + " generator takes at least 2 arguments");
                String genType = collArgs.get(0).getText();
                if (!"collector".equals(genType)) {
                    throw new FlumeSpecException("currently only handle 'collector' gen sinks");
                }
                CommonTree ctbody = collArgs.remove(1); // remove subsink.
                String body = FlumeSpecGen.genEventSink(ctbody);

                Context ctx = new Context(context);
                Pair<String, List<String>> idArgs = handleArgs(t, ctx);
                String sourceType = idArgs.getLeft();
                List<String> args = idArgs.getRight();
                args.add(0, body);

                // TODO replace with Generator Sink lookup
                Preconditions.checkArgument("collector".equals(sourceType));
                SinkBuilder builder = CollectorSink.builder();

                return builder.build(ctx, args.toArray(new String[0]));

            } catch (IllegalArgumentException iae) {
                throw new FlumeSpecException(iae.getMessage());
            }

        }

        case FAILCHAIN: {
            // TODO (jon) This is no longer necessary with the substitution mechanisms
            // found in the translators
            List<CommonTree> rollArgs = (List<CommonTree>) t.getChildren();
            Preconditions.checkArgument(rollArgs.size() >= 2);
            CommonTree ctbody = rollArgs.get(0);
            List<String> rargs = new ArrayList<String>(rollArgs.size() - 1);
            boolean first = true;
            for (CommonTree ct : rollArgs) {
                if (first) {
                    first = false;
                    continue;
                }
                // assumes this is a STRING
                rargs.add(buildSimpleArg(ct));
            }
            String body = FlumeSpecGen.genEventSink(ctbody);
            FlumeConfiguration conf = FlumeConfiguration.get();
            FailoverChainSink failchain = new FailoverChainSink(context, body, rargs,
                    conf.getFailoverInitialBackoff(), conf.getFailoverMaxSingleBackoff());
            return failchain;
        }
        // TODO (jon) new feature: handle pattern match splitter
        // case MATCH:

        default:
            throw new FlumeSpecException("bad parse tree! expected sink but got " + t.toStringTree());
        }
    }

    @SuppressWarnings("unchecked")
    static EventSinkDecorator<EventSink> buildEventSinkDecorator(Context context, CommonTree t)
            throws FlumeSpecException {
        Context ctx = new Context(context);
        Pair<String, List<String>> idArgs = handleArgs(t, ctx);
        String sinkType = idArgs.getLeft();
        List<String> args = idArgs.getRight();
        EventSinkDecorator deco = sinkFactory.getDecorator(ctx, sinkType, args.toArray(new String[0]));
        if (deco == null) {
            throw new FlumeIdException("Invalid sink decorator: " + FlumeSpecGen.genEventSinkDecorator(t));
        }
        return deco;
    }

    /**
     * Returns an unmodifiable set containing the sinks this builder supports
     */
    public static Set<String> getSinkNames() {
        return sinkFactory.getSinkNames();
    }

    /**
     * Return an unmodifiable set containing the decorators this builder supports
     */
    public static Set<String> getDecoratorNames() {
        return sinkFactory.getDecoratorNames();
    }

    /**
     * Return an unmodifiable set containing the sources this builder supports
     */
    public static Set<String> getSourceNames() {
        return srcFactory.getSourceNames();
    }
}