List of usage examples for java.net URL getHost
public String getHost()
From source file:jp.igapyon.selecrawler.SeleCrawlerWebContentAnalyzer.java
public static String adjustAnchorUrl(String href, String urlString) throws MalformedURLException { if (href.trim().length() == 0) { return null; }// w ww. j a va2 s .c om if (href.startsWith("#")) { return null; } if (href.startsWith("http") == false) { if (href.startsWith("//")) { final URL url = new URL(urlString); href = url.getProtocol() + ":" + href; } else if (href.startsWith("/")) { final URL url = new URL(urlString); final String hostpart = url.getProtocol() + "://" + url.getHost(); href = hostpart + href; } else if (href.startsWith(".")) { if (urlString.endsWith("/")) { href = urlString + href; } else { href = urlString.substring(0, urlString.lastIndexOf('/') + 1) + href; } } else { // ????? if (urlString.endsWith("/")) { // ???? href = urlString + href; } else { href = urlString.substring(0, urlString.lastIndexOf('/') + 1) + href; } } } return href; }
From source file:com.digitalpebble.storm.crawler.protocol.http.HttpRobotRulesParser.java
/** * Compose unique key to store and access robot rules in cache for given URL *//* ww w . java 2 s. c om*/ protected static String getCacheKey(URL url) { String protocol = url.getProtocol().toLowerCase(Locale.ROOT); // normalize // to // lower // case String host = url.getHost().toLowerCase(Locale.ROOT); // normalize to // lower case int port = url.getPort(); if (port == -1) { port = url.getDefaultPort(); } /* * Robot rules apply only to host, protocol, and port where robots.txt * is hosted (cf. NUTCH-1752). Consequently */ String cacheKey = protocol + ":" + host + ":" + port; return cacheKey; }
From source file:edu.kit.dama.staging.entities.AdalapiProtocolConfiguration.java
/** * Get the unique protocol identifier for the provided Url. The identifier * is generated using the schema://from w w w .j av a 2 s.c o m * * protocol[@host][:port] * * Valid identifiers according to this schema are e.g. http@myHost; * http@myHost:8080; ftp@anotherHost; file * * As there is no host/port information for Urls accessed via file protocol, * there is only one valid identifier for file Urls. * * @param pUrl A sample URL (protocol and authority are sufficient, e.g. * http://remoteHost:8080) as it should be accessed by the provided protocol * implementation. * * @return The identifier string. */ public final static String getProtocolIdentifier(URL pUrl) { String protocol = pUrl.getProtocol(); if (protocol == null) { throw new IllegalArgumentException("The provided Url " + pUrl + " has no protocol specified."); } String host = pUrl.getHost(); int port = pUrl.getPort(); if (host == null) { return protocol; } else { return protocol + "@" + host + ((port > -1) ? ":" + Integer.toString(port) : ""); } }
From source file:eu.fthevenet.binjr.sources.jrds.adapters.JrdsDataAdapter.java
/** * Builds a new instance of the {@link JrdsDataAdapter} class from the provided parameters. * * @param address the URL to the JRDS webapp. * @param zoneId the id of the time zone used to record dates. * @return a new instance of the {@link JrdsDataAdapter} class. *//* w w w .jav a 2 s . c o m*/ public static JrdsDataAdapter fromUrl(String address, ZoneId zoneId, JrdsTreeViewTab treeViewTab, String filter) throws DataAdapterException { try { // Detect if URL protocol is present. If not, assume http. if (!uriSchemePattern.matcher(address).find()) { address = "http://" + address; } URL url = new URL(address.replaceAll("/$", "")); if (url.getHost().trim().isEmpty()) { throw new CannotInitializeDataAdapterException("Malformed URL: no host"); } return new JrdsDataAdapter(url, zoneId, "utf-8", treeViewTab, filter); } catch (MalformedURLException e) { throw new CannotInitializeDataAdapterException("Malformed URL: " + e.getMessage(), e); } }
From source file:org.apache.kylin.engine.mr.common.HadoopStatusGetter.java
private static boolean isValidURL(String value) { if (StringUtils.isNotEmpty(value)) { java.net.URL url; try {/*from w ww . j a v a 2 s . co m*/ url = new java.net.URL(value); } catch (MalformedURLException var5) { return false; } return StringUtils.isNotEmpty(url.getProtocol()) && StringUtils.isNotEmpty(url.getHost()); } return false; }
From source file:HttpTransactionUtils.java
/** * Format a base URL string ( protocol://server[:port][/file-specification] ) * //from w ww . j av a2s. co m * @param url * URL to format * @param preserveFile * Keep the /directory/filename portion of the URL? * @return URL string */ public static String formatUrl(URL url, boolean preserveFile) throws MalformedURLException { StringBuilder result; int port; result = new StringBuilder(url.getProtocol()); result.append("://"); result.append(url.getHost()); if ((port = url.getPort()) != -1) { result.append(":"); result.append(String.valueOf(port)); } if (preserveFile) { String file = url.getFile(); if (file != null) { result.append(file); } } return result.toString(); }
From source file:com.icloud.framework.http.URLUtil.java
/** * Returns the {@link DomainSuffix} corresponding to the last public part of * the hostname//from w w w.j a v a 2 s.co m */ public static DomainSuffix getDomainSuffix(URL url) { DomainSuffixes tlds = DomainSuffixes.getInstance(); String host = url.getHost(); if (IP_PATTERN.matcher(host).matches()) return null; int index = 0; String candidate = host; for (; index >= 0;) { index = candidate.indexOf('.'); String subCandidate = candidate.substring(index + 1); DomainSuffix d = tlds.get(subCandidate); if (d != null) { return d; } candidate = subCandidate; } return null; }
From source file:com.icloud.framework.http.URLUtil.java
public static boolean isIPPattern(URL url) { // DomainSuffixes tlds = DomainSuffixes.getInstance(); String host = url.getHost(); // it seems that java returns hostnames ending with . if (host.endsWith(".")) host = host.substring(0, host.length() - 1); if (IP_PATTERN.matcher(host).matches()) return true; return false; }
From source file:com.icloud.framework.http.URLUtil.java
/** * Returns the domain name of the url. The domain name of a url is the * substring of the url's hostname, w/o subdomain names. As an example <br> * <code>// w w w .j ava 2s . c o m * getDomainName(conf, new URL(http://lucene.apache.org/)) * </code><br> * will return <br> * <code> apache.org</code> * */ public static String getDomainName(URL url) { DomainSuffixes tlds = DomainSuffixes.getInstance(); String host = url.getHost(); // it seems that java returns hostnames ending with . if (host.endsWith(".")) host = host.substring(0, host.length() - 1); if (IP_PATTERN.matcher(host).matches()) return host; int index = 0; String candidate = host; for (; index >= 0;) { index = candidate.indexOf('.'); String subCandidate = candidate.substring(index + 1); if (tlds.isDomainSuffix(subCandidate)) { return candidate; } candidate = subCandidate; } return candidate; }
From source file:com.zimbra.cs.servlet.util.AuthUtil.java
public static String getRedirectURL(HttpServletRequest req, Server server, boolean isAdminRequest, boolean relative) throws ServiceException, MalformedURLException { String redirectUrl;/*from w w w .j a v a 2 s .c o m*/ if (isAdminRequest) { redirectUrl = getAdminURL(server, relative); } else { redirectUrl = getMailURL(server, relative); } if (!relative) { URL url = new URL(redirectUrl); // replace host of the URL to the host the request was sent to String reqHost = req.getServerName(); String host = url.getHost(); if (!reqHost.equalsIgnoreCase(host)) { URL destUrl = new URL(url.getProtocol(), reqHost, url.getPort(), url.getFile()); redirectUrl = destUrl.toString(); } } return redirectUrl; }