Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.metron.common.typosquat; import com.google.common.collect.ImmutableList; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.lang.invoke.MethodHandles; import java.net.IDN; import java.util.*; /** * Substituting characters for ascii or unicode analogues which are visually similar (e.g. latlmes.com for latimes.com) * */ public class HomoglyphStrategy implements TyposquattingStrategy { private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final Map<Character, List<String>> glyphs = new HashMap<Character, List<String>>() { { put('a', ImmutableList.of("", "", "", "", "", "", "", "", "", "", "", "", "")); put('b', ImmutableList.of("d", "lb", "ib", "", "", "b", "", "")); put('c', ImmutableList.of("", "?", "", "", "", "")); put('d', ImmutableList.of("b", "cl", "dl", "di", "?", "", "", "")); put('e', ImmutableList.of("", "", "", "", "", "", "", "", "", "", "", "", "")); put('f', ImmutableList.of("", "", "")); put('g', ImmutableList.of("q", "", "", "", "", "", "", "?", "", "")); put('h', ImmutableList.of("lh", "ih", "", "", "?", "")); put('i', ImmutableList.of("1", "l", "", "", "", "", "", "", "", "?", "")); put('j', ImmutableList.of("", "?", "", "")); put('k', ImmutableList.of("lk", "ik", "lc", "", "", "")); put('l', ImmutableList.of("1", "i", "", "")); put('m', ImmutableList.of("n", "nn", "rn", "rr", "", "?", "", "")); put('n', ImmutableList.of("m", "r", "")); put('o', ImmutableList.of("0", "", "", "", "", "", "", "?", "?", "", "", "", "")); put('p', ImmutableList.of("?", "", "", "", "")); put('q', ImmutableList.of("g", "", "", "", "")); put('r', ImmutableList.of("", "", "", "", "")); put('s', ImmutableList.of("", "?", "", "", "")); put('t', ImmutableList.of("", "", "")); put('u', ImmutableList.of("", "", "?", "", "", "", "", "")); put('v', ImmutableList.of("", "", "v")); put('w', ImmutableList.of("vv", "", "", "?")); put('x', ImmutableList.of("", "", "")); put('y', ImmutableList.of("?", "", "", "", "")); put('z', ImmutableList.of("?", "", "", "?", "")); } }; @Override public Set<String> generateCandidates(String originalString) { Set<String> result = new HashSet<>(); String domain = originalString; if (StringUtils.isEmpty(domain)) { return result; } if (isAce(domain)) { //this is an ace domain. domain = IDN.toUnicode(domain); } for (int ws = 0; ws < domain.length(); ws++) { for (int i = 0; i < domain.length() - ws + 1; ++i) { String win = domain.substring(i, i + ws); for (int j = 0; j < ws; j++) { char c = win.charAt(j); if (glyphs.containsKey(c)) { for (String g : glyphs.get(c)) { String winNew = win.replaceAll("" + c, g); String d = domain.substring(0, i) + winNew + domain.substring(i + ws); result.add(d); if (!isAce(d)) { try { String dAscii = IDN.toASCII(d, IDN.ALLOW_UNASSIGNED); if (!d.equals(dAscii)) { result.add(dAscii); } } catch (IllegalArgumentException iae) { LOG.debug("Unable to parse " + d + ": " + iae.getMessage(), iae); } } } } } } } return result; } public static boolean isAce(String domainRaw) { String domain = domainRaw.toLowerCase(); return domain.startsWith("xn--") || domain.contains(".xn--"); } @Override public String name() { return "Homoglyph"; } }