org.eobjects.datacleaner.util.DefaultEnumMatcher.java Source code

Java tutorial

Introduction

Here is the source code for org.eobjects.datacleaner.util.DefaultEnumMatcher.java

Source

/**
 * DataCleaner (community edition)
 * Copyright (C) 2013 Human Inference
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.eobjects.datacleaner.util;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.eobjects.analyzer.util.HasAliases;
import org.eobjects.analyzer.util.ReflectionUtils;
import org.eobjects.analyzer.util.StringUtils;
import org.eobjects.metamodel.util.HasName;

import com.google.common.base.Splitter;
import com.google.common.base.Strings;

/**
 * Default {@link EnumMatcher} implementation that uses a normalized/trimmed
 * version of the following values for exact matching:
 * 
 * <ul>
 * <li>The constant name of the enum</li>
 * <li>The name of the enum, if it implements {@link HasName}</li>
 * <li>The alias(es) of the enum, if it implements {@link HasAliases}</li>
 * </ul>
 * 
 * @param <E>
 *            the enum class
 */
public class DefaultEnumMatcher<E extends Enum<?>> implements EnumMatcher<E> {

    private final Map<String, E> _exactMatchesMap;

    public DefaultEnumMatcher(Class<E> enumClass) {
        _exactMatchesMap = new HashMap<String, E>();

        final E[] enumConstants = enumClass.getEnumConstants();

        if (ReflectionUtils.is(enumClass, HasAliases.class)) {
            for (final E e : enumConstants) {
                final HasAliases hasAliases = (HasAliases) e;
                final String[] aliases = hasAliases.getAliases();
                if (aliases != null) {
                    for (String alias : aliases) {
                        putMatch(alias, e);
                    }
                }
            }
        }

        if (ReflectionUtils.is(enumClass, HasName.class)) {
            for (final E e : enumConstants) {
                final HasName hasName = (HasName) e;
                final String name = hasName.getName();
                putMatch(name, e);
            }
        }

        for (final E e : enumConstants) {
            final String constantName = e.name();
            putMatch(constantName, e);
        }
    }

    private void putMatch(String string, E e) {
        final String normalizedString = normalize(string);
        if (Strings.isNullOrEmpty(normalizedString)) {
            return;
        }
        _exactMatchesMap.put(normalizedString, e);

        final Collection<String> secondaryMatches = findSecondaryMatchStrings(string);
        for (String secondaryMatch : secondaryMatches) {
            if (!_exactMatchesMap.containsKey(secondaryMatch)) {
                _exactMatchesMap.put(secondaryMatch, e);
            }
        }
    }

    @Override
    public E suggestMatch(final String string) {
        final String normalizedString = normalize(string);
        if (Strings.isNullOrEmpty(normalizedString)) {
            return null;
        }
        final E exactMatchResult = _exactMatchesMap.get(normalizedString);
        if (exactMatchResult != null) {
            return exactMatchResult;
        }

        return null;
    }

    /**
     * Normalizes the incoming string before doing matching
     * 
     * @param string
     * @return
     */
    protected String normalize(String string) {
        if (string == null) {
            return null;
        }
        string = StringUtils.replaceWhitespaces(string, "");
        string = StringUtils.replaceAll(string, "-", "");
        string = StringUtils.replaceAll(string, "_", "");
        string = StringUtils.replaceAll(string, "|", "");
        string = StringUtils.replaceAll(string, "*", "");
        string = string.toUpperCase();
        return string;
    }

    private Collection<String> findSecondaryMatchStrings(String string) {
        if (string == null) {
            return Collections.emptySet();
        }

        string = StringUtils.replaceAll(string, "-", " ");
        string = StringUtils.replaceAll(string, "_", " ");
        string = StringUtils.replaceAll(string, "|", " ");
        string = StringUtils.replaceAll(string, "*", " ");
        string = StringUtils.replaceAll(string, "  ", " ");
        string = string.trim();
        string = string.toUpperCase();

        final Splitter splitter = Splitter.on(' ');
        final List<String> words1 = splitter.splitToList(string);

        string = string.replaceAll("[0-9]", "");

        final List<String> words2 = splitter.splitToList(string);

        string = StringUtils.replaceWhitespaces(string, "");

        final List<String> words3 = splitter.splitToList(string);

        final Collection<String> result = new HashSet<String>();
        result.addAll(words1);
        result.addAll(words2);
        result.addAll(words3);
        return result;
    }
}