Example usage for java.net URL getRef

List of usage examples for java.net URL getRef

Introduction

In this page you can find the example usage for java.net URL getRef.

Prototype

public String getRef() 

Source Link

Document

Gets the anchor (also known as the "reference") of this URL .

Usage

From source file:org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer.java

public String normalize(String urlString, String scope) throws MalformedURLException {
    if ("".equals(urlString)) // permit empty
        return urlString;

    urlString = urlString.trim(); // remove extra spaces

    URL url = new URL(urlString);

    String protocol = url.getProtocol();
    String host = url.getHost();//  w  w w.  j a v a 2 s  .  c  om
    int port = url.getPort();
    String file = url.getFile();

    boolean changed = false;

    if (!urlString.startsWith(protocol)) // protocol was lowercased
        changed = true;

    if ("http".equals(protocol) || "ftp".equals(protocol)) {

        if (host != null) {
            String newHost = host.toLowerCase(); // lowercase host
            if (!host.equals(newHost)) {
                host = newHost;
                changed = true;
            }
        }

        if (port == url.getDefaultPort()) { // uses default port
            port = -1; // so don't specify it
            changed = true;
        }

        if (file == null || "".equals(file)) { // add a slash
            file = "/";
            changed = true;
        }

        if (url.getRef() != null) { // remove the ref
            changed = true;
        }

        // check for unnecessary use of "/../"
        String file2 = substituteUnnecessaryRelativePaths(file);

        if (!file.equals(file2)) {
            changed = true;
            file = file2;
        }

    }

    if (changed)
        urlString = new URL(protocol, host, port, file).toString();

    return urlString;
}

From source file:com.iflytek.spider.net.BasicURLNormalizer.java

public String normalize(String urlString) throws MalformedURLException {
    if ("".equals(urlString)) // permit empty
        return urlString;

    urlString = urlString.trim(); // remove extra spaces

    URL url = new URL(urlString);

    String protocol = url.getProtocol();
    String host = url.getHost();//w ww .j  a v  a2s  .  com
    int port = url.getPort();
    String file = url.getFile();

    boolean changed = false;

    if (!urlString.startsWith(protocol)) // protocol was lowercased
        changed = true;

    if ("http".equals(protocol) || "ftp".equals(protocol)) {

        if (host != null) {
            String newHost = host.toLowerCase(); // lowercase host
            if (!host.equals(newHost)) {
                host = newHost;
                changed = true;
            }
        }

        if (port == url.getDefaultPort()) { // uses default port
            port = -1; // so don't specify it
            changed = true;
        }

        if (file == null || "".equals(file)) { // add a slash
            file = "/";
            changed = true;
        }

        if (url.getRef() != null) { // remove the ref
            changed = true;
        }

        // check for unnecessary use of "/../"
        String file2 = substituteUnnecessaryRelativePaths(file);

        if (!file.equals(file2)) {
            changed = true;
            file = file2;
        }

    }

    if (changed)
        urlString = new URL(protocol, host, port, file).toString();

    return urlString;
}

From source file:com.digitalpebble.storm.crawler.filtering.basic.BasicURLNormalizer.java

/**
 * Basic filter to remove query parameters from urls so parameters that
 * don't change the content of the page can be removed. An example would be
 * a google analytics query parameter like "utm_campaign" which might have
 * several different values for a url that points to the same content.
 *///from w w  w.ja  v a  2s  .com
private String filterQueryElements(String urlToFilter) {
    try {
        // Handle illegal characters by making a url first
        // this will clean illegal characters like |
        URL url = new URL(urlToFilter);

        if (StringUtils.isEmpty(url.getQuery())) {
            return urlToFilter;
        }

        List<NameValuePair> pairs = new ArrayList<NameValuePair>();
        URLEncodedUtils.parse(pairs, new Scanner(url.getQuery()), "UTF-8");
        Iterator<NameValuePair> pairsIterator = pairs.iterator();
        while (pairsIterator.hasNext()) {
            NameValuePair param = pairsIterator.next();
            if (queryElementsToRemove.contains(param.getName())) {
                pairsIterator.remove();
            }
        }

        StringBuilder newFile = new StringBuilder();
        if (url.getPath() != null) {
            newFile.append(url.getPath());
        }
        if (!pairs.isEmpty()) {
            Collections.sort(pairs, comp);
            String newQueryString = URLEncodedUtils.format(pairs, StandardCharsets.UTF_8);
            newFile.append('?').append(newQueryString);
        }
        if (url.getRef() != null) {
            newFile.append('#').append(url.getRef());
        }

        return new URL(url.getProtocol(), url.getHost(), url.getPort(), newFile.toString()).toString();
    } catch (MalformedURLException e) {
        LOG.warn("Invalid urlToFilter {}. {}", urlToFilter, e);
        return null;
    }
}

From source file:org.codice.alliance.nsili.client.SampleNsiliClient.java

private URI getEncodedUriFromString(String urlString) throws URISyntaxException, MalformedURLException {
    URL url = new URL(urlString);

    return new URI(url.getProtocol(), url.getUserInfo(), url.getHost(), url.getPort(), url.getPath(),
            url.getQuery(), url.getRef());
}

From source file:org.eclipse.smila.connectivity.framework.crawler.web.net.BasicUrlNormalizer.java

/**
 * {@inheritDoc}/*  w  w w .ja  v  a2  s  .c o  m*/
 */
public String normalize(String urlString) throws MalformedURLException {
    if ("".equals(urlString)) {
        return urlString;
    }

    // remove extra spaces
    urlString = urlString.trim();

    final URL url = new URL(urlString);

    final String protocol = url.getProtocol();
    String host = url.getHost();
    int port = url.getPort();
    String file = url.getFile();

    boolean changed = false;

    if (!urlString.startsWith(protocol)) {
        changed = true;
    }

    if ("http".equals(protocol) || "ftp".equals(protocol)) {

        if (host != null) {
            final String newHost = host.toLowerCase(); // lower case host
            if (!host.equals(newHost)) {
                host = newHost;
                changed = true;
            }
        }

        if (port == url.getDefaultPort()) { // uses default port
            port = -1; // so don't specify it
            changed = true;
        }

        if (file == null || "".equals(file)) { // add a slash
            file = "/";
            changed = true;
        }

        if (url.getRef() != null) { // remove the reference
            changed = true;
        }

        // check for unnecessary use of "/../"
        final String file2 = substituteUnnecessaryRelativePaths(file);

        if (!file.equals(file2)) {
            changed = true;
            file = file2;
        }

    }

    if (changed) {
        urlString = new URL(protocol, host, port, file).toString();
    }

    return urlString;
}

From source file:com.digitalpebble.stormcrawler.filtering.basic.BasicURLNormalizer.java

/**
 * Basic filter to remove query parameters from urls so parameters that
 * don't change the content of the page can be removed. An example would be
 * a google analytics query parameter like "utm_campaign" which might have
 * several different values for a url that points to the same content.
 *//*from  w w  w. j a  v a  2s  .co  m*/
private String filterQueryElements(String urlToFilter) {
    try {
        // Handle illegal characters by making a url first
        // this will clean illegal characters like |
        URL url = new URL(urlToFilter);

        if (StringUtils.isEmpty(url.getQuery())) {
            return urlToFilter;
        }

        List<NameValuePair> pairs = new ArrayList<>();
        URLEncodedUtils.parse(pairs, new Scanner(url.getQuery()), "UTF-8");
        Iterator<NameValuePair> pairsIterator = pairs.iterator();
        while (pairsIterator.hasNext()) {
            NameValuePair param = pairsIterator.next();
            if (queryElementsToRemove.contains(param.getName())) {
                pairsIterator.remove();
            }
        }

        StringBuilder newFile = new StringBuilder();
        if (url.getPath() != null) {
            newFile.append(url.getPath());
        }
        if (!pairs.isEmpty()) {
            Collections.sort(pairs, comp);
            String newQueryString = URLEncodedUtils.format(pairs, StandardCharsets.UTF_8);
            newFile.append('?').append(newQueryString);
        }
        if (url.getRef() != null) {
            newFile.append('#').append(url.getRef());
        }

        return new URL(url.getProtocol(), url.getHost(), url.getPort(), newFile.toString()).toString();
    } catch (MalformedURLException e) {
        LOG.warn("Invalid urlToFilter {}. {}", urlToFilter, e);
        return null;
    }
}

From source file:com.xpn.xwiki.web.XWikiServletURLFactory.java

/**
 * Converts a URL to a relative URL if it's a XWiki URL (keeping only the path + query string + anchor) and leave
 * the URL unchanged if it's an external URL.
 * <p>/*  www .j  a va 2 s .co  m*/
 * An URL is considered to be external if its server component doesn't match the server of the current request URL.
 * This means that URLs are made relative with respect to the current request URL rather than the current wiki set
 * on the XWiki context. Let's take an example:
 * 
 * <pre>
 * {@code
 * request URL: http://playground.xwiki.org/xwiki/bin/view/Sandbox/TestURL
 * current wiki: code (code.xwiki.org)
 * URL (1): http://code.xwiki.org/xwiki/bin/view/Main/WebHome
 * URL (2): http://playground.xwiki.org/xwiki/bin/view/Spage/Page
 * 
 * The result will be:
 * (1) http://code.xwiki.org/xwiki/bin/view/Main/WebHome
 * (2) /xwiki/bin/view/Spage/Page
 * }
 * </pre>
 * 
 * @param url the URL to convert
 * @return the converted URL as a string
 * @see com.xpn.xwiki.web.XWikiDefaultURLFactory#getURL(java.net.URL, com.xpn.xwiki.XWikiContext)
 */
@Override
public String getURL(URL url, XWikiContext context) {
    try {
        if (url == null) {
            return "";
        }

        String surl = url.toString();
        if (!surl.startsWith(serverURL.toString())) {
            // External URL: leave it as is.
            return surl;
        } else {
            // Internal XWiki URL: convert to relative.
            StringBuffer sbuf = new StringBuffer(url.getPath());
            String querystring = url.getQuery();
            if (!StringUtils.isEmpty(querystring)) {
                sbuf.append("?");
                sbuf.append(StringUtils.chomp(StringUtils.chomp(querystring, "&"), "&amp;"));
                // sbuf.append(querystring.replaceAll("&","&amp;"));
            }

            String anchor = url.getRef();
            if (!StringUtils.isEmpty(anchor)) {
                sbuf.append("#");
                sbuf.append(anchor);
            }
            return Util.escapeURL(sbuf.toString());
        }
    } catch (Exception e) {
        e.printStackTrace();
        return "";
    }
}

From source file:com.adito.core.CoreUtil.java

/**
 * @param redirect/*from  www.jav  a2s .c  o m*/
 * @return String
 */
static String processRefererString(String redirect) {
    try {
        URL u = new URL(redirect);
        String query = u.getQuery();
        if (query != null && !query.equals("")) {
            StringBuffer nq = new StringBuffer();
            StringTokenizer t = new StringTokenizer(query, "&");
            String parm = null;
            while (t.hasMoreTokens()) {
                parm = t.nextToken();
                if (!parm.startsWith("referer=") && !parm.startsWith("vpnMessage=")
                        && !parm.startsWith("vpnError=")) {
                    if (nq.length() > 0) {
                        nq.append("&");
                    }
                    nq.append(parm);
                }
            }
            query = nq.length() == 0 ? null : nq.toString();
        }
        StringBuffer file = new StringBuffer();
        if (u.getPath() != null) {
            file.append(u.getPath());
        }
        if (query != null) {
            file.append("?");
            file.append(query);
        }
        if (u.getRef() != null) {
            file.append("#");
            file.append(u.getRef());
        }
        u = new URL(u.getProtocol(), u.getHost(), u.getPort(), file.toString());
        return u.toExternalForm();
    } catch (MalformedURLException mrule) {
        int idx = redirect.indexOf("?");
        if (idx != -1) {
            String query = redirect.substring(idx + 1);
            redirect = redirect.substring(0, idx);
            if (query.length() > 0) {
                StringBuffer nq = new StringBuffer();
                StringTokenizer t = new StringTokenizer(query, "&");
                String parm = null;
                while (t.hasMoreTokens()) {
                    parm = t.nextToken();
                    if (!parm.startsWith("vpnMessage=") && !parm.startsWith("vpnError=")) {
                        if (nq.length() > 0) {
                            nq.append("&");
                        }
                        nq.append(parm);
                    }
                }
                query = nq.length() == 0 ? null : nq.toString();
                if (query != null) {
                    redirect = redirect + "?" + query;
                }
            }
        }
        return redirect;
    }
}

From source file:com.connectsdk.service.DLNAService.java

String encodeURL(String mediaURL)
        throws MalformedURLException, URISyntaxException, UnsupportedEncodingException {
    if (mediaURL == null || mediaURL.isEmpty()) {
        return "";
    }/* w  ww  .  j  a v a2 s  .co m*/
    String decodedURL = URLDecoder.decode(mediaURL, "UTF-8");
    if (decodedURL.equals(mediaURL)) {
        URL url = new URL(mediaURL);
        URI uri = new URI(url.getProtocol(), url.getUserInfo(), url.getHost(), url.getPort(), url.getPath(),
                url.getQuery(), url.getRef());
        return uri.toASCIIString();
    }
    return mediaURL;
}