com.indeed.imhotep.sql.IQLTranslator.java Source code

Java tutorial

Introduction

Here is the source code for com.indeed.imhotep.sql.IQLTranslator.java

Source

/*
 * Copyright (C) 2014 Indeed Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the
 * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.indeed.imhotep.sql;

import com.google.common.base.Function;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.indeed.imhotep.iql.DiffGrouping;
import com.indeed.imhotep.iql.RegexCondition;
import com.indeed.imhotep.metadata.FieldType;
import com.indeed.imhotep.sql.ast.BinaryExpression;
import com.indeed.util.serialization.LongStringifier;
import com.indeed.util.serialization.Stringifier;
import com.indeed.flamdex.lucene.LuceneQueryTranslator;
import com.indeed.imhotep.client.ImhotepClient;
import com.indeed.imhotep.ez.DynamicMetric;
import com.indeed.imhotep.ez.EZImhotepSession;
import com.indeed.imhotep.ez.Field;
import com.indeed.imhotep.iql.Condition;
import com.indeed.imhotep.iql.DistinctGrouping;
import com.indeed.imhotep.iql.FieldGrouping;
import com.indeed.imhotep.iql.Grouping;
import com.indeed.imhotep.iql.IQLQuery;
import com.indeed.imhotep.iql.IntInCondition;
import com.indeed.imhotep.iql.MetricCondition;
import com.indeed.imhotep.iql.PercentileGrouping;
import com.indeed.imhotep.iql.QueryCondition;
import com.indeed.imhotep.iql.SampleCondition;
import com.indeed.imhotep.iql.StatRangeGrouping;
import com.indeed.imhotep.iql.StatRangeGrouping2D;
import com.indeed.imhotep.iql.StringInCondition;
import com.indeed.imhotep.metadata.DatasetMetadata;
import com.indeed.imhotep.metadata.FieldMetadata;
import com.indeed.imhotep.sql.ast.Expression;
import com.indeed.imhotep.sql.ast.FunctionExpression;
import com.indeed.imhotep.sql.ast.NameExpression;
import com.indeed.imhotep.sql.ast.NumberExpression;
import com.indeed.imhotep.sql.ast.Op;
import com.indeed.imhotep.sql.ast.StringExpression;
import com.indeed.imhotep.sql.ast.TupleExpression;
import com.indeed.imhotep.sql.ast2.FromClause;
import com.indeed.imhotep.sql.ast2.SelectStatement;
import com.indeed.imhotep.sql.parser.ExpressionParser;
import com.indeed.imhotep.sql.parser.PeriodParser;
import com.indeed.imhotep.web.ImhotepMetadataCache;
import dk.brics.automaton.RegExp;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.joda.time.DateTime;
import org.joda.time.DurationFieldType;
import org.joda.time.Period;
import org.joda.time.PeriodType;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

import javax.annotation.Nonnull;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.indeed.imhotep.ez.Stats.Stat;
import static com.indeed.imhotep.ez.EZImhotepSession.*;

/**
 * @author jplaisance
 */
public final class IQLTranslator {
    private static final Logger log = Logger.getLogger(IQLTranslator.class);

    public static IQLQuery translate(SelectStatement parse, ImhotepClient client, String username,
            ImhotepMetadataCache metadata, long imhotepLocalTempFileSizeLimit,
            long imhotepDaemonTempFileSizeLimit) {
        if (log.isTraceEnabled()) {
            log.trace(parse.toHashKeyString());
        }

        final FromClause fromClause = parse.from;
        final String dataset = fromClause.getDataset();
        final DatasetMetadata datasetMetadata = metadata.getDataset(dataset);
        final Set<String> keywordAnalyzerWhitelist = metadata.getKeywordAnalyzerWhitelist(dataset);
        final List<Stat> stats = Lists.newArrayList();

        final List<Expression> projections = Lists.newArrayList(parse.select.getProjections());
        final DistinctGrouping distinctGrouping = getDistinctGrouping(projections, datasetMetadata);
        final PercentileGrouping percentileGrouping = getPercentileGrouping(projections, datasetMetadata,
                EZImhotepSession.counts());

        if (distinctGrouping != null && percentileGrouping != null) {
            throw new IllegalArgumentException("Cannot use distinct and percentile in the same query");
        }

        final StatMatcher statMatcher = new StatMatcher(datasetMetadata, keywordAnalyzerWhitelist);
        for (Expression expression : projections) {
            stats.add(expression.match(statMatcher));
        }

        final List<Condition> conditions;
        if (parse.where != null) {
            conditions = parse.where.getExpression()
                    .match(new ConditionMatcher(datasetMetadata, keywordAnalyzerWhitelist));
        } else {
            conditions = Collections.emptyList();
        }

        final List<Grouping> groupings = Lists.newArrayList();
        final GroupByMatcher groupByMatcher = new GroupByMatcher(datasetMetadata, keywordAnalyzerWhitelist,
                fromClause.getStart(), fromClause.getEnd());
        if (parse.groupBy != null) {
            for (Expression groupBy : parse.groupBy.groupings) {
                groupings.add(groupBy.match(groupByMatcher));
            }
        }

        if (distinctGrouping != null) {
            ensureDistinctSelectDoesntMatchGroupings(groupings, distinctGrouping);
            groupings.add(distinctGrouping); // distinct has to come last
        } else if (percentileGrouping != null) {
            groupings.add(percentileGrouping);
        }

        handleMultitermIn(conditions, groupings);

        handleDiffGrouping(groupings, stats);

        optimizeGroupings(groupings);

        return new IQLQuery(client, stats, fromClause.getDataset(), fromClause.getStart(), fromClause.getEnd(),
                conditions, groupings, parse.limit, username, metadata, imhotepLocalTempFileSizeLimit,
                imhotepDaemonTempFileSizeLimit);
    }

    private static void ensureDistinctSelectDoesntMatchGroupings(List<Grouping> groupings,
            DistinctGrouping distinctGrouping) {
        for (Field distinctField : distinctGrouping.getFields()) {
            for (Grouping grouping : groupings) {
                if (grouping instanceof FieldGrouping
                        && ((FieldGrouping) grouping).getField().equals(distinctField)) {
                    throw new IllegalArgumentException("Please remove distinct(" + distinctField.getFieldName()
                            + ") from the SELECT clause as it is always going to be 1 due to it being one of the GROUP BY groups");
                }
            }
        }
    }

    private static void optimizeGroupings(List<Grouping> groupings) {
        // if we have only one grouping we can safely disable exploding which allows us to stream the result
        if (groupings.size() == 1 && groupings.get(0) instanceof FieldGrouping) {
            FieldGrouping fieldGrouping = (FieldGrouping) groupings.get(0);
            if (!fieldGrouping.isNoExplode() && !fieldGrouping.isTopK() && !fieldGrouping.isTermSubset()) {
                groupings.set(0, new FieldGrouping(fieldGrouping.getField(), true));
            }
        }
    }

    /**
     * Handles converting queries of the form WHERE field IN (term1, term2, ...) GROUP BY field
     * to queries like: GROUP BY field IN (term1, term2, ...) .
     * This properly handles the case where filtered and grouped by field has multiple terms per doc (e.g. grp, rcv).
     * Modifies the passed in lists.
     */
    static void handleMultitermIn(List<Condition> conditions, List<Grouping> groupings) {
        for (int i = 0; i < conditions.size(); i++) {
            Condition condition = conditions.get(i);
            if (!(condition instanceof StringInCondition)) {
                continue;
            }
            StringInCondition inCondition = (StringInCondition) condition;
            if (inCondition.isEquality()) {
                continue; // when we have a single value filter (e.g. grp:smartphone), assume that MultitermIn is not intended
            }
            if (inCondition.isNegation()) {
                continue; // negations shouldn't be rewritten
            }
            Field.StringField field = inCondition.getStringField();
            // see if this field is also used in GROUP BY
            for (int j = 0; j < groupings.size(); j++) {
                Grouping grouping = groupings.get(j);
                if (!(grouping instanceof FieldGrouping)) {
                    continue;
                }
                FieldGrouping fieldGrouping = (FieldGrouping) grouping;
                if (!field.equals(fieldGrouping.getField())) {
                    continue;
                }
                // got a match. convert this grouping to a FieldInGrouping and remove the condition
                FieldGrouping fieldInGrouping = new FieldGrouping(field, fieldGrouping.isNoExplode(),
                        Lists.newArrayList(inCondition.getValues()));
                conditions.remove(i);
                i--; // have to redo the current index as indexes were shifted
                groupings.set(j, fieldInGrouping);
            }
        }
    }

    /**
     * Handles converting queries of the form GROUP BY diff(field, filter1, filter2, limit) SELECT metric
     * to queries like: GROUP BY field[top limit by abs(filter1*metric-filter2*metric)] select abs(filter1*metric-filter2*metric), filter1*metric, filter2*metric.
     * This properly handles the case where filtered and grouped by field has multiple terms per doc (e.g. grp, rcv).
     * Modifies the passed in lists.
     */
    private static void handleDiffGrouping(List<Grouping> groupings, List<Stat> stats) {
        for (int i = 0; i < groupings.size(); i++) {
            final Grouping grouping = groupings.get(i);
            if (!(grouping instanceof DiffGrouping)) {
                continue;
            }
            final Stat selectStat = stats.get(0);

            DiffGrouping diff = (DiffGrouping) grouping;
            Stat filter1 = diff.getFilter1();
            Stat filter2 = diff.getFilter2();

            Stat stat1 = mult(filter1, selectStat);
            Stat stat2 = mult(filter2, selectStat);

            Stat diffStat = abs(sub(stat1, stat2));
            stats.set(0, diffStat);
            // TODO: make client understand
            if (stats.size() > 1) {
                stats.set(1, stat1);
            } else {
                stats.add(stat1);
            }
            if (stats.size() > 2) {
                stats.set(2, stat2);
            } else {
                stats.add(stat2);
            }
            groupings.set(i, new FieldGrouping(diff.getField(), diff.getTopK(), diffStat, false));
        }
    }

    static DistinctGrouping getDistinctGrouping(List<Expression> projections, DatasetMetadata datasetMetadata) {
        DistinctGrouping distinctGrouping = null;
        int projectionNumber = 0;
        for (Iterator<Expression> projectionsIter = projections.iterator(); projectionsIter
                .hasNext(); projectionNumber++) {
            Expression projection = projectionsIter.next();
            if (!(projection instanceof FunctionExpression)) {
                continue;
            }
            FunctionExpression functionProjection = (FunctionExpression) projection;
            if (!functionProjection.function.equals("distinct")) {
                continue;
            }
            if (functionProjection.args.size() != 1) {
                throw new IllegalArgumentException(
                        "distinct() takes a field name as an argument and returns distinct count of terms for the field");
            }

            String fieldName = getStr(functionProjection.args.get(0));
            final Field field = getField(fieldName, datasetMetadata);
            projectionsIter.remove();

            if (distinctGrouping == null) {
                distinctGrouping = new DistinctGrouping();
            }
            distinctGrouping.addField(field, projectionNumber);
        }
        return distinctGrouping;
    }

    static PercentileGrouping getPercentileGrouping(List<Expression> projections, DatasetMetadata datasetMetadata,
            Stat countStat) {
        PercentileGrouping percentileGrouping = null;
        int projectionNumber = 0;
        for (Iterator<Expression> projectionsIter = projections.iterator(); projectionsIter
                .hasNext(); projectionNumber++) {
            Expression projection = projectionsIter.next();
            if (!(projection instanceof FunctionExpression)) {
                continue;
            }
            FunctionExpression functionProjection = (FunctionExpression) projection;
            if (!functionProjection.function.equals("percentile")) {
                continue;
            }
            if (functionProjection.args.size() != 2) {
                throw new IllegalArgumentException(
                        "percentile() takes a field name and a percentile and returns that percentile, e.g. percentile(tottime, 50)");
            }

            String fieldName = getStr(functionProjection.args.get(0));
            final Field field = getField(fieldName, datasetMetadata);
            projectionsIter.remove();

            final double percentile = parseInt(functionProjection.args.get(1));
            if (percentile < 0 || percentile > 100) {
                throw new IllegalArgumentException("percentile must be between 0 and 100");
            }

            if (percentileGrouping == null) {
                percentileGrouping = new PercentileGrouping(countStat);
            }
            percentileGrouping.addPercentileQuery(field, percentile, projectionNumber);
        }
        return percentileGrouping;
    }

    /**
     * Constructs the right type of Field depending on the available metadata.
     * Throws IllegalArgumentException if field is not found.
     */
    @Nonnull
    private static Field getField(String name, DatasetMetadata datasetMetadata) {
        final FieldMetadata fieldMetadata = datasetMetadata.getField(name);
        if (fieldMetadata == null) {
            throw new IllegalArgumentException("Unknown field: " + name);
        }
        return fieldMetadata.isIntImhotepField() ? Field.intField(name) : Field.stringField(name);
    }

    private static class StatMatcher extends Expression.Matcher<Stat> {
        private final DatasetMetadata datasetMetadata;

        private final Map<String, Function<List<Expression>, Stat>> statLookup;

        private Stat[] getStats(final List<Expression> input) {
            List<Stat> stats = Lists.newArrayList();
            for (Expression statString : input) {
                stats.add(statString.match(StatMatcher.this));
            }
            return stats.toArray(new Stat[stats.size()]);
        }

        private StatMatcher(final DatasetMetadata datasetMetadata, final Set<String> keywordAnalyzerWhitelist) {
            this.datasetMetadata = datasetMetadata;
            final ImmutableMap.Builder<String, Function<List<Expression>, Stat>> builder = ImmutableMap.builder();
            builder.put("count", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    return counts();
                }
            });
            builder.put("cached", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    if (input.size() != 1) {
                        throw new UnsupportedOperationException();
                    }
                    return cached(input.get(0).match(StatMatcher.this));
                }
            });
            builder.put("abs", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    if (input.size() != 1) {
                        throw new UnsupportedOperationException();
                    }
                    return abs(input.get(0).match(StatMatcher.this));
                }
            });
            builder.put("min", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    if (input.size() < 2) {
                        throw new UnsupportedOperationException("min() requires at least 2 arguments");
                    }
                    return min(getStats(input));
                }
            });
            builder.put("mulshr", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    if (input.size() != 3) {
                        throw new UnsupportedOperationException("mulshr requires 3 arguments: shift, stat1, stat2");
                    }
                    if (!(input.get(0) instanceof NumberExpression)) {
                        throw new IllegalArgumentException("First argument of mulshr() has to be an integer. ");
                    }
                    final String shiftStr = getStr(input.get(0));
                    final int shift = Integer.parseInt(shiftStr);
                    final Stat stat1 = input.get(1).match(StatMatcher.this);
                    final Stat stat2 = input.get(2).match(StatMatcher.this);
                    return multiplyShiftRight(shift, stat1, stat2);
                }
            });
            builder.put("shldiv", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    if (input.size() != 3) {
                        throw new UnsupportedOperationException("shldiv requires 3 arguments: shift, stat1, stat2");
                    }
                    if (!(input.get(0) instanceof NumberExpression)) {
                        throw new IllegalArgumentException("First argument of shldiv() has to be an integer. ");
                    }
                    final String shiftStr = getStr(input.get(0));
                    final int shift = Integer.parseInt(shiftStr);
                    final Stat stat1 = input.get(1).match(StatMatcher.this);
                    final Stat stat2 = input.get(2).match(StatMatcher.this);
                    return shiftLeftDivide(shift, stat1, stat2);
                }
            });
            builder.put("max", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    if (input.size() < 2) {
                        throw new UnsupportedOperationException("max() requires at least 2 arguments");
                    }
                    return max(getStats(input));
                }
            });
            builder.put("exp", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    int scaleFactor = 1;
                    if (input.size() == 2) {
                        scaleFactor = parseInt(input.get(1));
                    } else if (input.size() != 1) {
                        throw new UnsupportedOperationException("exp() requires 1 or 2 arguments. "
                                + "e.g. exp(ojc, 1) where ojc is a metric and 1 is a scaling factor that the terms "
                                + "get divided by before exponentiation and multiplied by after exponentiation. "
                                + "Scaling factor defaults to 1.");
                    }
                    return exp(input.get(0).match(StatMatcher.this), scaleFactor);
                }
            });
            builder.put("dynamic", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    if (input.size() != 1) {
                        throw new UnsupportedOperationException();
                    }
                    return dynamic(new DynamicMetric(getName(input.get(0))));
                }
            });
            builder.put("hasstr", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    String field = null;
                    String value = null;
                    if (input.size() == 1) {
                        final String param = getStr(input.get(0));
                        final String[] parts = param.split(":");
                        if (parts.length == 2) {
                            field = parts[0];
                            value = parts[1];
                        } else if (parts.length == 1 && param.trim().endsWith(":")) {
                            field = parts[0];
                            value = "";
                        }
                    } else if (input.size() == 2) {
                        field = getStr(input.get(0));
                        value = getStr(input.get(1));
                    }

                    if (Strings.isNullOrEmpty(field) || value == null) {
                        throw new IllegalArgumentException(
                                "incorrect usage in hasstr(). Examples: hasstr(rcv,jsv) or hasstr(\"rcv:jsv\")");
                    }
                    return hasString(field, value);
                }
            });
            builder.put("hasint", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    final String usageExamples = "Examples: hasint(clicked,1) or hasint(\"clicked:1\")";
                    String field = null;
                    long value = 0;
                    if (input.size() == 1) {
                        final String param = getStr(input.get(0));
                        final String[] parts = param.split(":");
                        if (parts.length == 2) {
                            field = parts[0];
                            try {
                                value = Long.parseLong(parts[1]);
                            } catch (NumberFormatException ignored) {
                                throw new IllegalArgumentException(
                                        "Value in hasint() has to be an integer. " + usageExamples);
                            }
                        }
                    } else if (input.size() == 2) {
                        field = getStr(input.get(0));
                        if (!(input.get(1) instanceof NumberExpression)) {
                            throw new IllegalArgumentException(
                                    "Second argument of hasint() has to be an integer. " + usageExamples);
                        }
                        value = parseInt(input.get(1));
                    }

                    if (Strings.isNullOrEmpty(field)) {
                        throw new IllegalArgumentException("incorrect usage in hasint(). " + usageExamples);
                    }
                    return hasInt(field, value);
                }
            });
            builder.put("floatscale", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    if (input.size() == 3) {
                        return floatScale(getName(input.get(0)), parseInt(input.get(1)), parseInt(input.get(2)));
                    } else if (input.size() == 2) {
                        return floatScale(getName(input.get(0)), parseInt(input.get(1)), 0);
                    } else if (input.size() == 1) {
                        return floatScale(getName(input.get(0)), 1, 0);
                    } else {
                        throw new UnsupportedOperationException();
                    }
                }
            });
            builder.put("lucene", new Function<List<Expression>, Stat>() {
                public Stat apply(final List<Expression> input) {
                    if (input.size() != 1) {
                        throw new UnsupportedOperationException(
                                "lucene() requires a string argument containing the lucene query to try on each document");
                    }
                    final String luceneQuery = getStr(input.get(0));
                    final com.indeed.flamdex.query.Query flamdexQuery = parseLuceneQuery(luceneQuery,
                            datasetMetadata, keywordAnalyzerWhitelist);
                    return lucene(flamdexQuery);
                }
            });
            statLookup = builder.build();
        }

        protected Stat binaryExpression(final Expression left, final Op op, final Expression right) {
            switch (op) {

            case PLUS:
                return add(left.match(this), right.match(this));
            case MINUS:
                return sub(left.match(this), right.match(this));
            case MUL:
                return mult(left.match(this), right.match(this));
            case DIV:
                return div(left.match(this), right.match(this));
            case MOD:
                return mod(left.match(this), right.match(this));
            case LESS:
                return less(left.match(this), right.match(this));
            case LESS_EQ:
                return lessEq(left.match(this), right.match(this));
            case EQ:
                if (left instanceof NameExpression
                        && (right instanceof NumberExpression || right instanceof StringExpression)) {
                    // probably a has[str/int] operation
                    final String fieldName = ((NameExpression) left).name;
                    final FieldMetadata field = datasetMetadata.getField(fieldName);
                    if (field == null) {
                        throw new IllegalArgumentException("Field not found: " + fieldName);
                    }
                    final FieldType fieldMetadataType = field.getType();
                    if (field.isIntImhotepField() && right instanceof NumberExpression) {
                        long value = parseInt(right);
                        return hasInt(fieldName, value);
                    } else if (fieldMetadataType == FieldType.Integer && right instanceof NumberExpression
                            || fieldMetadataType == FieldType.String) {
                        return hasString(fieldName, getStr(right));
                    }
                    // if it got here, it's not a has[str/int] operation
                }
                // try to compare as metrics
                return isEqual(left.match(this), right.match(this));
            case NOT_EQ:
                if (left instanceof NameExpression
                        && (right instanceof NumberExpression || right instanceof StringExpression)) {
                    final Stat equalsStat = binaryExpression(left, Op.EQ, right);
                    // TODO: only return if equalsStat is a HasIntStat or a HasStringStat
                    return sub(counts(), equalsStat);
                }
                // try to compare as metrics
                return isNotEqual(left.match(this), right.match(this));
            case GREATER:
                return greater(left.match(this), right.match(this));
            case GREATER_EQ:
                return greaterEq(left.match(this), right.match(this));
            case AGG_DIV: {
                if (right instanceof NumberExpression) {
                    long value = parseLong(right);
                    if (value == 0) {
                        throw new IllegalArgumentException("Can't divide by 0");
                    }
                    return aggDivConst(left.match(this), value);
                }
                return aggDiv(left.match(this), right.match(this));
            }
            default:
                throw new UnsupportedOperationException();
            }
        }

        @Override
        protected Stat unaryExpression(Op op, Expression operand) {
            if (operand instanceof NumberExpression) {
                final String stringValue = "-" + ((NumberExpression) operand).number;
                final long value = Long.parseLong(stringValue);
                return constant(value);
            } else {
                throw new UnsupportedOperationException("Unary negation is only supported on constants");
            }
        }

        protected Stat functionExpression(final String name, final List<Expression> args) {
            final Function<List<Expression>, Stat> function = statLookup.get(name);
            if (function == null) {
                throw new IllegalArgumentException("Unknown stat function: " + name);
            }
            return function.apply(args);
        }

        protected Stat nameExpression(final String name) {
            if (!datasetMetadata.hasField(name)) {
                throw new IllegalArgumentException("Unknown field name in a stat: " + name);
            }
            return intField(name);
        }

        protected Stat numberExpression(final String value) {
            return constant(Long.parseLong(value));
        }
    }

    private static final class ConditionMatcher extends Expression.Matcher<List<Condition>> {

        private final Map<String, Function<List<Expression>, Condition>> functionLookup;

        private final DatasetMetadata datasetMetadata;
        private final Set<String> keywordAnalyzerWhitelist;

        private final StatMatcher statMatcher;

        private boolean negation; // keeps track of whether we are inside a negated branch of expression

        private ConditionMatcher(final DatasetMetadata datasetMetadata,
                final Set<String> keywordAnalyzerWhitelist) {
            this.datasetMetadata = datasetMetadata;
            this.keywordAnalyzerWhitelist = keywordAnalyzerWhitelist;
            statMatcher = new StatMatcher(datasetMetadata, keywordAnalyzerWhitelist);
            final ImmutableMap.Builder<String, Function<List<Expression>, Condition>> builder = ImmutableMap
                    .builder();

            Function<List<Expression>, Condition> luceneQueryHandler = new Function<List<Expression>, Condition>() {
                public Condition apply(final List<Expression> input) {
                    if (input.size() != 1)
                        throw new IllegalArgumentException(
                                "lucene query function takes exactly one string parameter");
                    final String queryString = getStr(input.get(0));
                    final com.indeed.flamdex.query.Query luceneQuery = parseLuceneQuery(queryString,
                            datasetMetadata, keywordAnalyzerWhitelist);
                    return new QueryCondition(luceneQuery, negation);
                }
            };
            builder.put("lucene", luceneQueryHandler);

            // TODO: remove
            builder.put("query", luceneQueryHandler);
            // TODO: remove. can relax parsing of function params when it's done
            builder.put("between", new Function<List<Expression>, Condition>() {
                public Condition apply(final List<Expression> input) {
                    if (input.size() != 3)
                        throw new IllegalArgumentException(
                                "between requires 3 arguments: stat, min, max. " + input.size() + " provided");
                    final Stat stat = input.get(0).match(statMatcher);
                    final long min = parseLong(input.get(1));
                    final long max = parseLong(input.get(2));
                    return new MetricCondition(stat, min, max, negation);
                }
            });
            builder.put("sample", new Function<List<Expression>, Condition>() {
                public Condition apply(final List<Expression> input) {
                    if (input.size() < 2 || input.size() > 4)
                        throw new IllegalArgumentException(
                                "sample() requires 2 to 4 arguments: fieldName, samplingRatioNumerator, [samplingRatioDenominator=100], [randomSeed]. "
                                        + input.size() + " provided");
                    final Expression arg0 = input.get(0);
                    if (!(arg0 instanceof NameExpression)) {
                        throw new UnsupportedOperationException(
                                "sample() first argument has to be a field name. Instead given: "
                                        + String.valueOf(arg0));
                    }
                    final NameExpression nameExpression = (NameExpression) arg0;
                    final String fieldName = nameExpression.name;
                    final Field field = getField(fieldName, datasetMetadata);
                    final int numerator = Math.max(0, parseInt(input.get(1)));
                    final int denominator = Math.max(1,
                            Math.max(numerator, input.size() >= 3 ? parseInt(input.get(2)) : 100));
                    final String salt;
                    if (input.size() >= 4) {
                        final String userSalt = Strings.nullToEmpty(getStr(input.get(3)));
                        salt = userSalt.substring(0, Math.min(userSalt.length(), 32)); // limit salt length to 32 char just in case
                    } else {
                        // generate a new salt
                        salt = String.valueOf(System.nanoTime() % Integer.MAX_VALUE);
                    }
                    return new SampleCondition(field, (double) numerator / denominator, salt, negation);
                    // we can also do it through a predicate condition but that requires FTGS instead of a regroup
                }
            });

            functionLookup = builder.build();
        }

        @Override
        protected List<Condition> binaryExpression(final Expression left, final Op op, final Expression right) {
            boolean usingNegation = negation; // local copy so that it can be modified independently
            switch (op) {
            case NOT_IN:
                usingNegation = !usingNegation;
                // fall through to IN
            case IN: {
                final NameExpression name = (NameExpression) left;
                final TupleExpression values = (TupleExpression) right;
                if (datasetMetadata.hasStringField(name.name)) {
                    // TODO how do we handle tokenized fields here?
                    final String[] strings = new String[values.expressions.size()];
                    int index = 0;
                    for (Expression expression : values.expressions) {
                        strings[index++] = getStr(expression);
                    }
                    Arrays.sort(strings); // looks like terms being sorted is a pre-requisite of stringOrRegroup()
                    return Lists.<Condition>newArrayList(
                            new StringInCondition(Field.stringField(name.name), usingNegation, false, strings));
                } else if (datasetMetadata.hasIntField(name.name)) {
                    final long[] ints = new long[values.expressions.size()];
                    int index = 0;
                    for (Expression expression : values.expressions) {
                        if (!(expression instanceof NumberExpression)) {
                            throw new IllegalArgumentException(
                                    "A non integer value specified for an integer field: " + name.name);
                        }
                        ints[index++] = parseLong(expression);
                    }
                    Arrays.sort(ints); // looks like terms being sorted is a pre-requisite of intOrRegroup()
                    return Lists.<Condition>newArrayList(
                            new IntInCondition(Field.intField(name.name), usingNegation, ints));
                } else {
                    throw new IllegalArgumentException("Unknown field: " + name.name);
                }
            }
            case NOT_EQ:
                usingNegation = !usingNegation;
                // fall through to EQ
            case EQ:
                if (left instanceof NameExpression) {
                    final NameExpression name = (NameExpression) left;
                    if (datasetMetadata.hasField(name.name)) {
                        return handleFieldComparison(name, right, usingNegation);
                    }
                } else if (right instanceof NumberExpression) {
                    return handleMetricComparison(left, right, usingNegation);
                } else if (!(left instanceof StringExpression || right instanceof StringExpression)) {
                    // assume we have a comparison of 2 metrics. filter for the result of that = 1
                    return handleMetricComparison(new BinaryExpression(left, Op.EQ, right),
                            new NumberExpression("1"), usingNegation);
                } else {
                    throw new IllegalArgumentException(
                            "Can't compare the provided operands: " + left + "; " + right);
                }
            case REGEX_NOT_EQ:
                usingNegation = !usingNegation;
                // fall through to REGEX_EQ
            case REGEX_EQ:
                if (!(left instanceof NameExpression)) {
                    throw new UnsupportedOperationException(
                            "Regexp compare only works on field names. Instead given: " + String.valueOf(left));
                }
                final NameExpression nameExpression = (NameExpression) left;
                final String fieldName = nameExpression.name;
                if (!datasetMetadata.hasStringField(fieldName)) {
                    if (datasetMetadata.hasIntField(fieldName)) {
                        throw new IllegalArgumentException("Regex filter currently only works on String fields. "
                                + "Int field given: " + fieldName);
                    }
                    throw new IllegalArgumentException("Unknown field: " + fieldName);
                }
                String regexp = getStr(right);
                // validate the provided regex
                try {
                    new RegExp(regexp).toAutomaton();
                } catch (Exception e) {
                    throw new IllegalArgumentException("The provided regex filter '" + regexp
                            + "' failed to parse. " + "\nError was: " + e.getMessage()
                            + "\nThe supported regex syntax can be seen here: http://www.brics.dk/automaton/doc/index.html?dk/brics/automaton/RegExp.html",
                            e);
                }
                return Collections.<Condition>singletonList(
                        new RegexCondition(Field.stringField(fieldName), regexp, usingNegation));
            case AND:
                final List<Condition> ret = Lists.newArrayList();
                ret.addAll(left.match(this));
                ret.addAll(right.match(this));
                return ret;
            case LESS:
            case LESS_EQ:
            case GREATER:
            case GREATER_EQ:
                if ((left instanceof StringExpression || right instanceof StringExpression)) {
                    throw new IllegalArgumentException(op.toString() + " operation can't be applied to a string");
                }
                if (left instanceof NameExpression && right instanceof NumberExpression) {
                    final Stat stat = left.match(statMatcher);
                    long value = parseLong(right); // constant we are comparing against
                    if (op == Op.LESS) {
                        value -= 1;
                    } else if (op == Op.GREATER) {
                        value += 1;
                    }
                    final long min;
                    final long max;
                    if (op == Op.LESS || op == Op.LESS_EQ) {
                        min = Long.MIN_VALUE;
                        max = value;
                    } else { // GREATER / GREATER_EQ
                        min = value;
                        max = Long.MAX_VALUE;
                    }
                    return Collections.<Condition>singletonList(new MetricCondition(stat, min, max, negation));
                } else {
                    // assume we have a comparison of 2 metrics. filter for the result of that = 1
                    return handleMetricComparison(new BinaryExpression(left, op, right), new NumberExpression("1"),
                            negation);
                }
            case PLUS:
            case MINUS:
            case MUL:
            case DIV:
            case AGG_DIV:
            case MOD:
                throw new UnsupportedOperationException(op.toString() + " operation is not usable as a filter");
            default:
                throw new UnsupportedOperationException();
            }
        }

        private List<Condition> handleMetricComparison(Expression left, Expression right, boolean usingNegation) {
            final Stat stat;
            try {
                stat = left.match(statMatcher);
            } catch (Exception e) {
                throw new IllegalArgumentException(
                        "Left side of comparison is not a known field or metric: " + left.toString());
            }
            if (!(right instanceof NumberExpression))
                throw new IllegalArgumentException("Metric comparison values have to be numbers");
            final long value = parseLong(right); // constant we are comparing against

            return Collections.<Condition>singletonList(new MetricCondition(stat, value, value, usingNegation));
        }

        private List<Condition> handleFieldComparison(NameExpression name, Expression right,
                boolean usingNegation) {
            if (datasetMetadata.hasStringField(name.name)) {
                final String value = getStr(right);

                final boolean isTokenized = !datasetMetadata.isImhotepDataset()
                        && (keywordAnalyzerWhitelist == null || !keywordAnalyzerWhitelist.contains(name.name)
                                && !keywordAnalyzerWhitelist.contains("*"));
                if (isTokenized && right instanceof StringExpression) {
                    // special handling for tokenized fields and multi-word queries e.g. jobsearch:q
                    String[] words = value.split("\\s+");
                    if (words.length > 1) {
                        List<Condition> conditions = Lists.newArrayList();
                        for (String word : words) {
                            conditions.add(
                                    new StringInCondition(Field.stringField(name.name), usingNegation, true, word));
                        }
                        return Lists.newArrayList(conditions);
                    } // else fall through to the normal case
                }

                final String[] strings = new String[] { value };
                return Lists.<Condition>newArrayList(
                        new StringInCondition(Field.stringField(name.name), usingNegation, true, strings));
            } else if (datasetMetadata.hasIntField(name.name)) {
                final long[] ints = new long[1];
                if (!(right instanceof NumberExpression)) {
                    throw new IllegalArgumentException(name.name
                            + " is an integer field and has to be compared to an integer. Instead was given: "
                            + right.toString());
                }
                ints[0] = parseLong(right);
                return Lists.<Condition>newArrayList(
                        new IntInCondition(Field.intField(name.name), usingNegation, ints));
            } else {
                throw new IllegalArgumentException("Unknown field: " + name.name);
            }
        }

        @Override
        protected List<Condition> unaryExpression(Op op, Expression operand) {
            if (op.equals(Op.NEG)) {
                negation = !negation;
                List<Condition> result = operand.match(this);
                negation = !negation;
                return result;
            }
            throw new UnsupportedOperationException();
        }

        @Override
        protected List<Condition> functionExpression(final String name, final List<Expression> args) {
            final Function<List<Expression>, Condition> function = functionLookup.get(name);
            if (function == null) {
                throw new IllegalArgumentException();
            }
            return Collections.singletonList(function.apply(args));
        }

        @Override
        protected List<Condition> otherwise() {
            throw new UnsupportedOperationException("Syntax error in a Where condition");
        }
    }

    private static com.indeed.flamdex.query.Query parseLuceneQuery(String queryString,
            DatasetMetadata datasetMetadata, Set<String> keywordAnalyzerWhitelist) {
        // Pick a lucene query analyzer based on whether it is for a lucene or flamdex dataset
        // and what is in the keywordAnalyzerWhitelist for the dataset
        final Analyzer analyzer;
        if (datasetMetadata.isImhotepDataset()) {
            analyzer = new KeywordAnalyzer();
        } else if (!keywordAnalyzerWhitelist.isEmpty()) {
            final KeywordAnalyzer kwAnalyzer = new KeywordAnalyzer();
            if (keywordAnalyzerWhitelist.contains("*")) {
                analyzer = kwAnalyzer;
            } else {
                final WhitespaceAnalyzer whitespaceAnalyzer = new WhitespaceAnalyzer();
                final PerFieldAnalyzerWrapper perFieldAnalyzer = new PerFieldAnalyzerWrapper(whitespaceAnalyzer);
                for (String field : keywordAnalyzerWhitelist) {
                    perFieldAnalyzer.addAnalyzer(field, kwAnalyzer);
                }
                analyzer = perFieldAnalyzer;
            }
        } else {
            analyzer = new WhitespaceAnalyzer();
        }

        final QueryParser queryParser = new QueryParser("foo", analyzer);
        queryParser.setDefaultOperator(QueryParser.Operator.AND);
        // only auto-lowercase for non-Flamdex datasets
        queryParser.setLowercaseExpandedTerms(!datasetMetadata.isImhotepDataset());
        final Query query;
        try {
            query = queryParser.parse(queryString);
        } catch (ParseException e) {
            throw Throwables.propagate(e);
        }
        return LuceneQueryTranslator.rewrite(query, datasetMetadata.getIntImhotepFieldSet());
    }

    private static final class GroupByMatcher extends Expression.Matcher<Grouping> {
        private static final int MAX_RECOMMENDED_BUCKETS = 1000;

        private final Map<String, Function<List<Expression>, Grouping>> functionLookup;

        private final DatasetMetadata datasetMetadata;
        private final DateTime start;
        private final DateTime end;

        private final StatMatcher statMatcher;

        private GroupByMatcher(final DatasetMetadata datasetMetadata, final Set<String> keywordAnalyzerWhitelist,
                final DateTime start, final DateTime end) {
            statMatcher = new StatMatcher(datasetMetadata, keywordAnalyzerWhitelist);
            this.datasetMetadata = datasetMetadata;
            this.start = start;
            this.end = end;
            final ImmutableMap.Builder<String, Function<List<Expression>, Grouping>> builder = ImmutableMap
                    .builder();
            builder.put("topterms", new Function<List<Expression>, Grouping>() {
                public Grouping apply(final List<Expression> input) {
                    if (input.size() < 2 || input.size() > 4) {
                        throw new IllegalArgumentException(
                                "topterms() takes 2 to 4 arguments. " + input.size() + " given");
                    }
                    final String fieldName = getName(input.get(0));
                    final int topK = parseInt(input.get(1));
                    final Stat stat;
                    if (input.size() >= 3) {
                        stat = input.get(2).match(statMatcher);
                    } else {
                        stat = counts();
                    }
                    final boolean bottom;
                    if (input.size() >= 4) {
                        String ascDesc = getStr(input.get(3));
                        bottom = ascDesc.equals("bottom");
                    } else {
                        bottom = false;
                    }

                    final Field field = getField(fieldName, datasetMetadata);
                    return new FieldGrouping(field, topK, stat, bottom);
                }
            });

            builder.put("diff", new Function<List<Expression>, Grouping>() {
                public Grouping apply(final List<Expression> input) {
                    if (input.size() != 4) {
                        throw new IllegalArgumentException(
                                "diff() takes 4 args: fieldName(string), metricFilter1(StatExpression), metricFilter2(StatExpression), topK(int)");
                    }
                    final String fieldName = getName(input.get(0));
                    Stat statFilter1 = input.get(1).match(statMatcher);
                    Stat statFilter2 = input.get(2).match(statMatcher);
                    final int topK = parseInt(input.get(3));

                    final Field field = getField(fieldName, datasetMetadata);
                    return new DiffGrouping(field, statFilter1, statFilter2, topK);
                }
            });

            Function<List<Expression>, Grouping> bucketHandler = new Function<List<Expression>, Grouping>() {
                public Grouping apply(final List<Expression> input) {
                    if (input.size() == 4 || input.size() == 5) {
                        final long min = parseLong(input.get(1));
                        final long max = parseLong(input.get(2));
                        final long interval = parseTimeBucketInterval(getStr(input.get(3)), false, 0, 0);
                        boolean noGutters = false;
                        if (input.size() == 5) {
                            final String noGuttersStr = getStr(input.get(4));
                            noGutters = "true".equalsIgnoreCase(noGuttersStr) || "1".equals(noGuttersStr);
                        }
                        return new StatRangeGrouping(input.get(0).match(statMatcher), min, max, interval, noGutters,
                                new LongStringifier());
                    } else if (input.size() == 8) {
                        // DEPRECATED: queries using buckets() with 8 args should be rewritten as 2 buckets() groupings with 4 args each
                        final Stat xStat = input.get(0).match(statMatcher);
                        final long xMin = parseLong(input.get(1));
                        final long xMax = parseLong(input.get(2));
                        final long xInterval = parseTimeBucketInterval(getStr(input.get(3)), false, 0, 0);
                        final Stat yStat = input.get(4).match(statMatcher);
                        final long yMin = parseLong(input.get(5));
                        final long yMax = parseLong(input.get(6));
                        final long yInterval = parseTimeBucketInterval(getStr(input.get(7)), false, 0, 0);
                        return new StatRangeGrouping2D(xStat, xMin, xMax, xInterval, yStat, yMin, yMax, yInterval);
                    } else {
                        throw new IllegalArgumentException(
                                "buckets() takes 4 or 5 arguments: stat, min(long), max(long), bucket_size(long), [noGutters(boolean)]");
                    }
                }
            };
            builder.put("bucket", bucketHandler);
            builder.put("buckets", bucketHandler);

            Function<List<Expression>, Grouping> timeHandler = new Function<List<Expression>, Grouping>() {
                public Grouping apply(final List<Expression> input) {
                    if (input.size() > 3) {
                        throw new IllegalArgumentException("time function takes up to 3 args");
                    }
                    final String bucket = input.size() > 0 ? getStr(input.get(0)) : null;
                    final String format = input.size() > 1 ? getStr(input.get(1)) : null;
                    final Expression timeField = input.size() > 2 ? input.get(2) : null;

                    return timeBuckets(bucket, format, timeField);
                }
            };
            builder.put("timebuckets", timeHandler);
            builder.put("time", timeHandler);
            functionLookup = builder.build();
        }

        private Grouping timeBuckets(String bucket, String format, Expression timeField) {
            final int min = (int) (start.getMillis() / 1000);
            final int max = (int) (end.getMillis() / 1000);
            final long interval = parseTimeBucketInterval(bucket, true, min, max);
            final DateTimeFormatter dateTimeFormatter;
            if (format != null) {
                dateTimeFormatter = DateTimeFormat.forPattern(format);
            } else {
                dateTimeFormatter = DateTimeFormat.forPattern("YYYY-MM-dd HH:mm:ss");
            }
            final Stringifier<Long> stringifier = new Stringifier<Long>() {
                public String toString(final Long integer) {
                    return new DateTime(integer * 1000).toString(dateTimeFormatter);
                }

                public Long fromString(final String str) {
                    return (new DateTime(str).getMillis() / 1000);
                }
            };
            final Stat stat;
            if (timeField != null) {
                stat = timeField.match(statMatcher);
            } else {
                // TODO: time field inference?
                stat = intField(datasetMetadata.getTimeFieldName());
            }
            return new StatRangeGrouping(stat, min, max, interval, false, stringifier);
        }

        private long parseTimeBucketInterval(String bucketSizeStr, boolean isTime, int min, int max) {
            if (Strings.isNullOrEmpty(bucketSizeStr)) {
                bucketSizeStr = inferTimeBucketSize();
            }

            long bucketSize;

            if (StringUtils.isNumeric(bucketSizeStr)) {
                // given a pure number
                bucketSize = Long.parseLong(bucketSizeStr);
                if (isTime) {
                    // no suffix specified for a time bucket size.
                    // assume hours instead of seconds to avoid overflows due to unintended second buckets
                    bucketSize *= SECONDS_IN_HOUR;
                }
            } else if (bucketSizeStr.charAt(bucketSizeStr.length() - 1) == 'b' && min > 0 && max > 0) {
                // given the number of buckets instead of the bucket size. so compute the bucket size ourselves
                int bucketCount = Integer.parseInt(bucketSizeStr.substring(0, bucketSizeStr.length() - 1));
                if (bucketCount < 1) {
                    throw new IllegalArgumentException("Number of time buckets has to be at least 1");
                }
                return (long) Math.ceil((max - min) / (double) bucketCount); // bucket size rounded up
            } else {
                Period period = PeriodParser.parseString(bucketSizeStr);
                if (period == null) {
                    throw new IllegalArgumentException("Bucket size argument couldn't be parsed: " + bucketSizeStr);
                }
                if (period.getMonths() > 0 || period.getYears() > 0) {
                    throw new IllegalArgumentException(
                            "Months and years are not supported as bucket sizes because they vary in length. "
                                    + "Please convert to a fixed period (e.g days, weeks) or request an absolute number of buckets (e.g. 5b)");
                }
                bucketSize = period.toStandardSeconds().getSeconds();
            }

            // validate time period bucketing is compatible with the given time range
            if (isTime) {
                int xMin = (int) (start.getMillis() / 1000);
                int xMax = (int) (end.getMillis() / 1000);
                long timePeriod = xMax - xMin;

                if (timePeriod % bucketSize != 0) {
                    StringBuilder exceptionBuilder = new StringBuilder("You requested a time period (");
                    appendTimePeriod(timePeriod, exceptionBuilder);
                    exceptionBuilder.append(") not evenly divisible by the bucket size (");
                    appendTimePeriod(bucketSize, exceptionBuilder);
                    exceptionBuilder.append("). To correct, increase the time range by ");
                    appendTimePeriod(bucketSize - timePeriod % bucketSize, exceptionBuilder);
                    exceptionBuilder.append(" or reduce the time range by ");
                    appendTimePeriod(timePeriod % bucketSize, exceptionBuilder);
                    throw new IllegalArgumentException(exceptionBuilder.toString());
                }
            }

            return bucketSize;
        }

        private static int appendTimePeriod(long timePeriod, StringBuilder builder) {
            final int timePeriodUnits;
            if (timePeriod % SECONDS_IN_WEEK == 0) {
                // duration is in days
                builder.append(timePeriod / SECONDS_IN_WEEK);
                builder.append(" weeks");
                timePeriodUnits = SECONDS_IN_WEEK;
            } else if (timePeriod % SECONDS_IN_DAY == 0) {
                // duration is in days
                builder.append(timePeriod / SECONDS_IN_DAY);
                builder.append(" days");
                timePeriodUnits = SECONDS_IN_DAY;
            } else if (timePeriod % SECONDS_IN_HOUR == 0) {
                // duration is in hours
                builder.append(timePeriod / SECONDS_IN_HOUR);
                builder.append(" hours");
                timePeriodUnits = SECONDS_IN_HOUR;
            } else if (timePeriod % SECONDS_IN_MINUTE == 0) {
                // duration is in minutes
                builder.append(timePeriod / SECONDS_IN_MINUTE);
                builder.append(" minutes");
                timePeriodUnits = SECONDS_IN_MINUTE;
            } else {
                // duration is seconds
                builder.append(timePeriod);
                builder.append(" seconds");
                timePeriodUnits = 1;
            }
            return timePeriodUnits;
        }

        private static final int SECONDS_IN_MINUTE = 60;
        private static final int SECONDS_IN_HOUR = SECONDS_IN_MINUTE * 60;
        private static final int SECONDS_IN_DAY = SECONDS_IN_HOUR * 24;
        private static final int SECONDS_IN_WEEK = SECONDS_IN_DAY * 7;

        protected Grouping functionExpression(final String name, final List<Expression> args) {
            final Function<List<Expression>, Grouping> function = functionLookup.get(name);
            if (function == null) {
                throw new IllegalArgumentException("Unknown function in group by: " + name);
            }
            return function.apply(args);
        }

        protected Grouping nameExpression(final String name) {
            if ("time".equals(name)) { // time buckets special case
                return timeBuckets(null, null, null);
            } // else // normal simple field grouping

            final Field field = getField(name, datasetMetadata);
            return new FieldGrouping(field, true);
        }

        @Override
        protected Grouping bracketsExpression(final String field, final String content) {
            return topTerms(field, content);
        }

        @Override
        protected Grouping unaryExpression(Op op, Expression operand) {
            switch (op) {
            case EXPLODE: {
                final String fieldName = getStr(operand);
                final Field field = getField(fieldName, datasetMetadata);
                return new FieldGrouping(field, false);
            }
            default:
                throw new UnsupportedOperationException();
            }
        }

        @Override
        protected Grouping binaryExpression(final Expression left, final Op op, final Expression right) {
            switch (op) {
            case IN: {
                final NameExpression name = (NameExpression) left;
                final TupleExpression values = (TupleExpression) right;
                List<String> terms = Lists.newArrayListWithCapacity(values.expressions.size());
                for (Expression expression : values.expressions) {
                    terms.add(getStr(expression));
                }
                final Field field = getField(name.name, datasetMetadata);
                return new FieldGrouping(field, true, terms);
            }
            default:
                throw new UnsupportedOperationException();
            }
        }

        private String inferTimeBucketSize() {
            Period period = new Period(start, end,
                    PeriodType.forFields(new DurationFieldType[] { DurationFieldType.weeks(),
                            DurationFieldType.days(), DurationFieldType.hours(), DurationFieldType.minutes(),
                            DurationFieldType.seconds() }));
            // try various sizes from smallest to largest until we find one that gives us number of buckets no more than we want
            for (int i = 0; i <= 4; i++) {
                int buckets;
                String value;
                switch (i) {
                case 4: {
                    buckets = period.toStandardWeeks().getWeeks();
                    value = "1w";
                    break;
                }
                case 3: {
                    buckets = period.toStandardDays().getDays();
                    value = "1d";
                    break;
                }
                case 2: {
                    buckets = period.toStandardHours().getHours();
                    value = "1h";
                    break;
                }
                case 1: {
                    buckets = period.toStandardMinutes().getMinutes();
                    value = "1m";
                    break;
                }
                case 0: {
                    buckets = period.toStandardSeconds().getSeconds();
                    value = "1s";
                    break;
                }
                default: {
                    throw new RuntimeException("Shouldn't happen");
                }
                }
                if (buckets < MAX_RECOMMENDED_BUCKETS) {
                    return value;
                }
            }
            // we should never get here but just in case
            return "1w";
        }

        private final String syntaxExamples = "Syntax examples:" + "\nTop terms: country[top 5 by sjc]"
                + "\nBucketing: buckets(oji, 0, 10, 1)";

        private Grouping topTerms(String fieldName, String arg) {
            if (arg == null || arg.trim().isEmpty()) {
                // treat as a request to get all terms but not explode
                final Field field = getField(fieldName, datasetMetadata);
                return new FieldGrouping(field, true);
            }

            Pattern topTermsPattern = Pattern
                    .compile("\\s*(?:(top|bottom)\\s+)?(\\d+)\\s*(?:\\s*(?:by|,)\\s*(.+))?\\s*");
            Matcher matcher = topTermsPattern.matcher(arg);
            if (!matcher.matches()) {
                throw new IllegalArgumentException("'group by' part treated as top terms couldn't be parsed: "
                        + fieldName + "[" + arg + "].\n" + syntaxExamples);
            }

            final int topK = Integer.parseInt(matcher.group(2));
            final Stat stat;
            String statStr = matcher.group(3);
            if (!Strings.isNullOrEmpty(statStr)) {
                try {
                    Expression statExpression = ExpressionParser.parseExpression(statStr);
                    stat = statExpression.match(statMatcher);
                } catch (Exception e) {
                    throw new IllegalArgumentException(
                            "Couldn't parse the stat expression for top terms: " + statStr + "\n" + syntaxExamples,
                            e);
                }
            } else {
                stat = counts();
            }
            final boolean bottom = "bottom".equals(matcher.group(1));

            final Field field = getField(fieldName, datasetMetadata);
            return new FieldGrouping(field, topK, stat, bottom);
        }
    }

    private static int parseInt(Expression expression) {
        return (int) parseLong(expression);
    }

    private static long parseLong(Expression expression) {
        return expression.match(GET_LONG);
    }

    private static final Expression.Matcher<Long> GET_LONG = new Expression.Matcher<Long>() {
        protected Long numberExpression(final String value) {
            return Long.parseLong(value);
        }

        @Override
        protected Long unaryExpression(Op op, Expression operand) {
            if (operand instanceof NumberExpression) {
                final String stringValue = "-" + ((NumberExpression) operand).number;
                return Long.parseLong(stringValue);
            } else {
                throw new UnsupportedOperationException("Expected a number to negate, got " + operand.toString());
            }
        }
    };

    private static final Expression.Matcher<String> GET_STR = new Expression.Matcher<String>() {
        protected String numberExpression(final String value) {
            return value;
        }

        protected String stringExpression(final String value) {
            return value;
        }

        protected String nameExpression(final String value) {
            return value;
        }
    };

    private static String getStr(Expression expression) {
        return expression.match(GET_STR);
    }

    private static String getName(Expression expression) {
        return ((NameExpression) expression).name;
    }
}