Java tutorial
/* * Copyright 2016 Fluo authors (see AUTHORS) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package io.fluo.webindex.core.models; import java.io.Serializable; import java.util.Objects; import java.util.function.Function; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.validator.routines.InetAddressValidator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class URL implements Serializable { private static final Logger log = LoggerFactory.getLogger(URL.class); private static final String URL_SEP_REGEX = "[/?#]"; private static final String HTTP_PROTO = "http://"; private static final String HTTPS_PROTO = "https://"; private static final String PAGE_ID_SEP = ">"; public static final InetAddressValidator validator = InetAddressValidator.getInstance(); private static final long serialVersionUID = 1L; private String domain; private String host; private String path; private int port; private boolean secure; private boolean ipHost; public URL(String domain, String host, String path, int port, boolean secure, boolean ipHost) { Objects.requireNonNull(domain); Objects.requireNonNull(host); Objects.requireNonNull(path); this.domain = domain; this.host = host; this.path = path; this.port = port; this.secure = secure; this.ipHost = ipHost; } public static void badUrl(boolean logError, String msg) { if (logError) { log.error(msg); } else { log.debug(msg); } throw new IllegalArgumentException(msg); } public static URL from(String rawUrl, Function<String, String> domainFromHost, Function<String, Boolean> isValidHost) { if (rawUrl.contains(PAGE_ID_SEP)) { badUrl(false, "Skipping raw URL as it contains '" + PAGE_ID_SEP + "':" + rawUrl); } String trimUrl = rawUrl.trim(); if (trimUrl.length() < 8) { badUrl(false, "Raw URL is too short to start with valid protocol: " + rawUrl); } String urlNoProto = ""; boolean secure = false; int port = 80; if (trimUrl.substring(0, 7).equalsIgnoreCase(HTTP_PROTO)) { urlNoProto = trimUrl.substring(7); } else if (trimUrl.substring(0, 8).equalsIgnoreCase(HTTPS_PROTO)) { urlNoProto = trimUrl.substring(8); secure = true; port = 443; } else { badUrl(false, "Raw URL does not start with valid protocol: " + rawUrl); } String hostPort; String[] args = urlNoProto.split(URL_SEP_REGEX, 2); String path; String sep; if (args.length == 2) { hostPort = args[0].toLowerCase(); int sepIndex = args[0].length(); sep = urlNoProto.substring(sepIndex, sepIndex + 1); path = sep + args[1]; } else { hostPort = urlNoProto.toLowerCase(); path = "/"; } args = hostPort.split(":", 2); String host; if (args.length == 2) { host = args[0]; try { port = Integer.parseInt(args[1]); } catch (NumberFormatException e) { badUrl(false, "Raw URL (" + rawUrl + ") has invalid port: " + args[1]); } } else { host = hostPort; } if (host.isEmpty()) { badUrl(false, "Raw URL cannot have empty host: " + rawUrl); } String domain = host; boolean ipHost = isValidIP(host); if (!ipHost) { if (!isValidHost.apply(host)) { badUrl(false, "Raw URL (" + rawUrl + ") has invalid host: " + host); } domain = domainFromHost.apply(host); } return new URL(domain, host, path, port, secure, ipHost); } public static boolean isValid(String rawUrl, Function<String, String> domainFromHost, Function<String, Boolean> isValidHost) { try { from(rawUrl, domainFromHost, isValidHost); return true; } catch (Exception e) { return false; } } public static boolean isValidIP(String host) { return validator.isValid(host); } public static String reverseHost(String host) { String[] hostArgs = host.split("\\."); ArrayUtils.reverse(hostArgs); StringBuilder sb = new StringBuilder(); for (int i = 0; i < hostArgs.length - 1; i++) { sb.append(hostArgs[i]); sb.append("."); } sb.append(hostArgs[hostArgs.length - 1]); if (host.endsWith(".")) { sb.append("."); } return sb.toString(); } public boolean hasIPHost() { return ipHost; } public String getHost() { return host; } public String getReverseHost() { if (hasIPHost()) { return host; } return reverseHost(host); } public String getPath() { return path; } public boolean isSecure() { return secure; } public int getPort() { return port; } public boolean isImage() { return path.matches("([^\\s]+(\\.(?i)(jpeg|jpg|png|gif|bmp))$)"); } @Override public String toString() { StringBuilder url = new StringBuilder(); url.append("http"); if (secure) { url.append("s"); } url.append("://"); url.append(host); if (!(port == 80 && !secure) && !(port == 443 && secure)) { url.append(":"); url.append(port); } url.append(path); return url.toString(); } public String toPageID() { String reverseDomain = getReverseDomain(); String nonDomain = getReverseHost().substring(reverseDomain.length()); String portStr = ""; if ((!secure && port != 80) || (secure && port != 443)) { portStr = Integer.toString(port); } return reverseDomain + PAGE_ID_SEP + nonDomain + PAGE_ID_SEP + (secure ? "s" : "o") + portStr + PAGE_ID_SEP + path; } public static URL fromPageID(String pageID) { String[] idArgs = pageID.split(PAGE_ID_SEP); if (idArgs.length != 4) { throw new IllegalArgumentException("Page ID has too few or many parts: " + pageID); } String domain = idArgs[0]; String host = idArgs[0] + idArgs[1]; boolean ipHost = isValidIP(host); if (!ipHost) { domain = reverseHost(domain); host = reverseHost(host); } boolean secure = false; int port = 80; if (idArgs[2].startsWith("s")) { secure = true; port = 443; } else if (!idArgs[2].startsWith("o")) { throw new IllegalArgumentException("Page ID does not have port info beg with 's' or 'o': " + pageID); } if (idArgs[2].length() > 1) { port = Integer.parseInt(idArgs[2].substring(1)); } String path = idArgs[3]; return new URL(domain, host, path, port, secure, ipHost); } public String getDomain() { return domain; } public String getReverseDomain() { if (hasIPHost()) { return domain; } return reverseHost(domain); } @Override public boolean equals(Object o) { if (o instanceof URL) { URL other = (URL) o; return domain.equals(other.domain) && host.equals(other.host) && path.equals(other.path) && port == other.port && secure == other.secure; } return false; } @Override public int hashCode() { int result = domain.hashCode(); result = 31 * result + host.hashCode(); result = 31 * result + path.hashCode(); result = 31 * result + port; result = 31 * result + (secure ? 1 : 0); return result; } }