org.apache.metron.common.typosquat.HomoglyphStrategy.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.metron.common.typosquat.HomoglyphStrategy.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.metron.common.typosquat;

import com.google.common.collect.ImmutableList;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.invoke.MethodHandles;
import java.net.IDN;
import java.util.*;

/**
 *  Substituting characters for ascii or unicode analogues which are visually similar (e.g. latlmes.com for latimes.com)
 *
 */
public class HomoglyphStrategy implements TyposquattingStrategy {

    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    public static final Map<Character, List<String>> glyphs = new HashMap<Character, List<String>>() {
        {
            put('a', ImmutableList.of("", "", "", "", "", "", "", "", "", "", "", "",
                    ""));
            put('b', ImmutableList.of("d", "lb", "ib", "", "", "b", "", ""));
            put('c', ImmutableList.of("", "?", "", "", "", ""));
            put('d', ImmutableList.of("b", "cl", "dl", "di", "?", "", "", ""));
            put('e', ImmutableList.of("", "", "", "", "", "", "", "", "", "", "", "",
                    ""));
            put('f', ImmutableList.of("", "", ""));
            put('g', ImmutableList.of("q", "", "", "", "", "", "", "?", "", ""));
            put('h', ImmutableList.of("lh", "ih", "", "", "?", ""));
            put('i', ImmutableList.of("1", "l", "", "", "", "", "", "", "", "?", ""));
            put('j', ImmutableList.of("", "?", "", ""));
            put('k', ImmutableList.of("lk", "ik", "lc", "", "", ""));
            put('l', ImmutableList.of("1", "i", "", ""));
            put('m', ImmutableList.of("n", "nn", "rn", "rr", "", "?", "", ""));
            put('n', ImmutableList.of("m", "r", ""));
            put('o', ImmutableList.of("0", "", "", "", "", "", "", "?", "?", "", "", "",
                    ""));
            put('p', ImmutableList.of("?", "", "", "", ""));
            put('q', ImmutableList.of("g", "", "", "", ""));
            put('r', ImmutableList.of("", "", "", "", ""));
            put('s', ImmutableList.of("", "?", "", "", ""));
            put('t', ImmutableList.of("", "", ""));
            put('u', ImmutableList.of("", "", "?", "", "", "", "", ""));
            put('v', ImmutableList.of("", "", "v"));
            put('w', ImmutableList.of("vv", "", "", "?"));
            put('x', ImmutableList.of("", "", ""));
            put('y', ImmutableList.of("?", "", "", "", ""));
            put('z', ImmutableList.of("?", "", "", "?", ""));
        }
    };

    @Override
    public Set<String> generateCandidates(String originalString) {
        Set<String> result = new HashSet<>();
        String domain = originalString;
        if (StringUtils.isEmpty(domain)) {
            return result;
        }
        if (isAce(domain)) {
            //this is an ace domain.
            domain = IDN.toUnicode(domain);
        }
        for (int ws = 0; ws < domain.length(); ws++) {
            for (int i = 0; i < domain.length() - ws + 1; ++i) {
                String win = domain.substring(i, i + ws);
                for (int j = 0; j < ws; j++) {
                    char c = win.charAt(j);
                    if (glyphs.containsKey(c)) {
                        for (String g : glyphs.get(c)) {
                            String winNew = win.replaceAll("" + c, g);
                            String d = domain.substring(0, i) + winNew + domain.substring(i + ws);
                            result.add(d);
                            if (!isAce(d)) {
                                try {
                                    String dAscii = IDN.toASCII(d, IDN.ALLOW_UNASSIGNED);
                                    if (!d.equals(dAscii)) {
                                        result.add(dAscii);
                                    }
                                } catch (IllegalArgumentException iae) {
                                    LOG.debug("Unable to parse " + d + ": " + iae.getMessage(), iae);
                                }
                            }
                        }
                    }
                }
            }
        }
        return result;
    }

    public static boolean isAce(String domainRaw) {
        String domain = domainRaw.toLowerCase();
        return domain.startsWith("xn--") || domain.contains(".xn--");
    }

    @Override
    public String name() {
        return "Homoglyph";
    }
}