Java tutorial
/** * Copyright (C) 2013 Seajas, the Netherlands. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3, as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.seajas.search.utilities.web; import java.net.URI; import java.net.URISyntaxException; import java.util.HashMap; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.util.StringUtils; /** * Internet location routines. * * @author Pascal S. de Kloe <pascal@quies.net> */ public final class WebResourceLocators { /** * Logger. */ private static final Logger logger = LoggerFactory.getLogger(WebResourceLocators.class); /** * The ASCII characters which should always be percent encoded. */ private static final String ASCII_ESCAPE_SET = ",'\" `"; /** * Percent escaping map. */ private static final Map<String, String> xmlEntityPercentEscapes = getXmlEntityPercentEscapes(); /** * Default constructor. */ private WebResourceLocators() { } /** * Gets a resource locator. * * @param uri * @throws URISyntaxException */ public static URI parseURI(String uri) throws URISyntaxException { if (!StringUtils.hasText(uri)) throw new URISyntaxException("", "No content"); uri = uri.trim(); // Escape illegal characters for (int i = 0; i < uri.length(); ++i) { char c = uri.charAt(i); if (ASCII_ESCAPE_SET.indexOf(c) >= 0) uri = String.format("%s%%%02X%s", uri.substring(0, i), c & 0xFF, uri.substring(i + 1)); } // Parse XML entities for (int start = uri.indexOf('&'); start >= 0; start = uri.indexOf('&', start + 1)) { int end = uri.indexOf(';', start); if (end < 0) break; String entity = uri.substring(start + 1, end); String replacement = null; if (entity.startsWith("#")) { try { int codepoint = entity.startsWith("#x") ? Integer.parseInt(entity.substring(2), 16) : Integer.parseInt(entity.substring(1)); if (codepoint <= 0xFF) replacement = String.format("%%%02X", codepoint); else replacement = new String(Character.toChars(codepoint)); } catch (Exception e) { logger.trace("Unparseable numeric entity.", e); } } else { replacement = xmlEntityPercentEscapes.get(entity); } if (replacement != null) { if (logger.isDebugEnabled()) logger.debug(String.format("Replaced entity %s with %s for %s", entity, replacement, uri)); uri = uri.substring(0, start) + replacement + uri.substring(end + 1); } else if (logger.isDebugEnabled()) { logger.debug(String.format("Skiped entity %s for %s", entity, uri)); } } // Multiple fragments while (uri.indexOf('#') != uri.lastIndexOf('#')) { int i = uri.lastIndexOf('#'); // Escape last hash mark uri = uri.substring(0, i) + "%23" + uri.substring(i + 1); } return new URI(uri).normalize(); } /** * Gets a resource locator. The optional base URIs are used to resolve relative paths in order of appearance. * * @param uri * the serialized form. * @param baseURIs * the serialized forms. * @throws URISyntaxException * when all interpretation attempts have failed. */ public static URI parseURI(final String uri, final String... baseURIs) throws URISyntaxException { URI result = parseURI(uri); if (!result.isAbsolute() && baseURIs != null) { for (String base : baseURIs) { try { result = parseURI(base).resolve(result); if (result.isAbsolute()) break; } catch (URISyntaxException e) { if (logger.isDebugEnabled()) logger.debug(String.format("Skipping unparseable base URI %s for %s", base, uri), e); } } } return result; } private static Map<String, String> getXmlEntityPercentEscapes() { Map<String, String> map = new HashMap<String, String>(); map.put("amp", "%26"); map.put("lt", "%3C"); map.put("gt", "%3E"); map.put("apos", "%27"); map.put("quot", "%22"); return map; } }