gobblin.util.DatasetFilterUtils.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.util.DatasetFilterUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.util;

import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import gobblin.configuration.State;

/**
 * A utility class for filtering datasets through blacklist and whitelist.
 */
public class DatasetFilterUtils {

    public static List<Pattern> getPatternList(State state, String propKey) {
        return getPatternList(state, propKey, StringUtils.EMPTY);
    }

    public static List<Pattern> getPatternList(State state, String propKey, String def) {
        List<String> list = state.getPropAsList(propKey, def);
        return getPatternsFromStrings(list);
    }

    /**
     * Convert a list of Strings to a list of Patterns.
     */
    public static List<Pattern> getPatternsFromStrings(List<String> strings) {
        List<Pattern> patterns = Lists.newArrayList();
        for (String s : strings) {
            patterns.add(Pattern.compile(s));
        }
        return patterns;
    }

    public static List<String> filter(List<String> topics, List<Pattern> blacklist, List<Pattern> whitelist) {
        List<String> result = Lists.newArrayList();
        for (String topic : topics) {
            if (survived(topic, blacklist, whitelist)) {
                result.add(topic);
            }
        }
        return result;
    }

    public static Set<String> filter(Set<String> topics, List<Pattern> blacklist, List<Pattern> whitelist) {
        Set<String> result = Sets.newHashSet();
        for (String topic : topics) {
            if (survived(topic, blacklist, whitelist)) {
                result.add(topic);
            }
        }
        return result;
    }

    /**
     * A topic survives if (1) it doesn't match the blacklist, and
     * (2) either whitelist is empty, or it matches the whitelist.
     * Whitelist and blacklist use regex patterns (NOT glob patterns).
     */
    public static boolean survived(String topic, List<Pattern> blacklist, List<Pattern> whitelist) {
        if (stringInPatterns(topic, blacklist)) {
            return false;
        }
        return (whitelist.isEmpty() || stringInPatterns(topic, whitelist));
    }

    /**
     * Determines whether a string matches one of the regex patterns.
     */
    public static boolean stringInPatterns(String s, List<Pattern> patterns) {
        for (Pattern pattern : patterns) {
            if (pattern.matcher(s).matches()) {
                return true;
            }
        }
        return false;
    }
}