Example usage for java.net URL getDefaultPort

List of usage examples for java.net URL getDefaultPort

Introduction

In this page you can find the example usage for java.net URL getDefaultPort.

Prototype

public int getDefaultPort() 

Source Link

Document

Gets the default port number of the protocol associated with this URL .

Usage

From source file:edu.uci.ics.crawler4j.robotstxt.RobotstxtServer.java

private HostDirectives fetchDirectives(URL url) {
    WebURL robotsTxtUrl = new WebURL();
    String host = getHost(url);//www. j a  v a  2s.com
    String port = (url.getPort() == url.getDefaultPort() || url.getPort() == -1) ? "" : ":" + url.getPort();
    robotsTxtUrl.setURL("http://" + host + port + "/robots.txt");
    HostDirectives directives = null;
    PageFetchResult fetchResult = null;
    try {
        fetchResult = pageFetcher.fetchHeader(robotsTxtUrl);
        if (fetchResult.getStatusCode() == HttpStatus.SC_OK) {
            Page page = new Page(robotsTxtUrl);
            fetchResult.fetchContent(page);
            if (Util.hasPlainTextContent(page.getContentType())) {
                try {
                    String content;
                    if (page.getContentCharset() == null) {
                        content = new String(page.getContentData());
                    } else {
                        content = new String(page.getContentData(), page.getContentCharset());
                    }
                    directives = RobotstxtParser.parse(content, config.getUserAgentName());
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    } finally {
        if (fetchResult != null) {
            fetchResult.discardContentIfNotConsumed();
        }
    }
    if (directives == null) {
        // We still need to have this object to keep track of the time we //
        // fetched it
        directives = new HostDirectives();
    }
    synchronized (host2directivesCache) {
        if (host2directivesCache.size() == config.getCacheSize()) {
            String minHost = null;
            long minAccessTime = Long.MAX_VALUE;
            for (Entry<String, HostDirectives> entry : host2directivesCache.entrySet()) {
                if (entry.getValue().getLastAccessTime() < minAccessTime) {
                    minAccessTime = entry.getValue().getLastAccessTime();
                    minHost = entry.getKey();
                }
            }
            host2directivesCache.remove(minHost);
        }
        host2directivesCache.put(host, directives);
    }
    return directives;
}

From source file:com.grendelscan.commons.http.CookieJar.java

public List<Cookie> getMatchingCookies(final URL url) {
    int port = url.getPort();
    if (port < 0) {
        port = url.getDefaultPort();
    }/*from w w w .  j a v  a 2s  . c om*/
    CookieOrigin origin = new CookieOrigin(url.getHost(), port, url.getPath(),
            url.getProtocol().equalsIgnoreCase("https"));
    return getMatchingCookies(origin);
}

From source file:com.kagilum.plugins.icescrum.IceScrumSession.java

private void setAuthentication() throws MalformedURLException {
    int port;//  www. java2 s .  c  o m
    URL url = new URL(settings.getUrl() + "/version/");
    if (url.getPort() == -1) {
        port = url.getDefaultPort();
    } else {
        port = url.getPort();
    }
    client.getState().setCredentials(new AuthScope(url.getHost(), port),
            new UsernamePasswordCredentials(settings.getUsername(), settings.getPassword()));
}

From source file:com.nanocrawler.robotstxt.RobotstxtServer.java

private HostDirectives fetchDirectives(URL url) {
    WebURL robotsTxtUrl = new WebURL();
    String host = getHost(url);/*from www  .j av a2 s.c  om*/
    String port = (url.getPort() == url.getDefaultPort() || url.getPort() == -1) ? "" : ":" + url.getPort();
    robotsTxtUrl.setURL("http://" + host + port + "/robots.txt");
    HostDirectives directives = null;
    PageFetchResult fetchResult = null;

    try {
        fetchResult = pageFetcher.fetchHeader(robotsTxtUrl);
        // TO_DO: Does this work on redirects e.g. http://news.ycombinator.com/robots.txt -> https://news.ycombinator.com/robots.txt
        if (fetchResult.getStatusCode() == HttpStatus.SC_OK) {
            Page page = new Page(robotsTxtUrl);
            fetchResult.fetchContent(page);
            if (ContentTypeUtil.hasPlainTextContent(page.getContentType())) {
                try {
                    String content;
                    if (page.getContentCharset() == null) {
                        content = new String(page.getContentData());
                    } else {
                        content = new String(page.getContentData(), page.getContentCharset());
                    }
                    directives = RobotstxtParser.parse(content, config.getUserAgentName());
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    } finally {
        if (fetchResult != null) {
            fetchResult.discardContentIfNotConsumed();
        }
    }

    if (directives == null) {
        directives = new HostDirectives();
    }

    synchronized (host2directivesCache) {
        if (host2directivesCache.size() == config.getCacheSize()) {
            String minHost = null;
            long minAccessTime = Long.MAX_VALUE;
            for (Entry<String, HostDirectives> entry : host2directivesCache.entrySet()) {
                if (entry.getValue().getLastAccessTime() < minAccessTime) {
                    minAccessTime = entry.getValue().getLastAccessTime();
                    minHost = entry.getKey();
                }
            }
            host2directivesCache.remove(minHost);
        }
        host2directivesCache.put(host, directives);
    }
    return directives;
}

From source file:frame.crawler4j.robotstxt.RobotstxtServer.java

private HostDirectives fetchDirectives(URL url) {
    WebURL robotsTxtUrl = new WebURL();
    String host = getHost(url);//from   ww w.  j a va2s .  c  om
    String port = (url.getPort() == url.getDefaultPort() || url.getPort() == -1) ? "" : ":" + url.getPort();
    robotsTxtUrl.setURL("http://" + host + port + "/robots.txt");
    HostDirectives directives = null;
    PageFetchResult fetchResult = null;
    try {
        fetchResult = pageFetcher.fetchHeader(robotsTxtUrl);
        if (fetchResult.getStatusCode() == HttpStatus.SC_OK) {
            Page page = new Page(robotsTxtUrl);
            fetchResult.fetchContent(page);
            if (Util.hasPlainTextContent(page.getContentType())) {
                try {
                    String content;
                    if (page.getContentCharset() == null) {
                        content = new String(page.getContentData());
                    } else {
                        content = new String(page.getContentData(), page.getContentCharset());
                    }
                    directives = RobotstxtParser.parse(content, config.getUserAgentName());
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    } finally {
        if (fetchResult != null) {
            fetchResult.discardContentIfNotConsumed();
        }
    }
    if (directives == null) {
        // We still need to have this object to keep track of the time we
        // fetched it
        directives = new HostDirectives();
    }
    synchronized (host2directivesCache) {
        if (host2directivesCache.size() == config.getCacheSize()) {
            String minHost = null;
            long minAccessTime = Long.MAX_VALUE;
            for (Entry<String, HostDirectives> entry : host2directivesCache.entrySet()) {
                if (entry.getValue().getLastAccessTime() < minAccessTime) {
                    minAccessTime = entry.getValue().getLastAccessTime();
                    minHost = entry.getKey();
                }
            }
            host2directivesCache.remove(minHost);
        }
        host2directivesCache.put(host, directives);
    }
    return directives;
}

From source file:de.comlineag.snc.webcrawler.robotstxt.RobotstxtServer.java

private HostDirectives fetchDirectives(URL url) {
    WebURL robotsTxtUrl = new WebURL();
    String host = getHost(url);//from w  w  w.j av a  2s.  com
    String port = (url.getPort() == url.getDefaultPort() || url.getPort() == -1) ? "" : ":" + url.getPort();
    robotsTxtUrl.setURL("http://" + host + port + "/robots.txt");
    HostDirectives directives = null;
    PageFetchResult fetchResult = null;
    try {
        fetchResult = pageFetcher.fetchHeader(robotsTxtUrl);
        if (fetchResult.getStatusCode() == HttpStatus.SC_OK) {
            Page page = new Page(robotsTxtUrl);
            fetchResult.fetchContent(page);
            if (WebCrawlerUtil.hasPlainTextContent(page.getContentType())) {
                try {
                    String content;
                    if (page.getContentCharset() == null) {
                        content = new String(page.getContentData());
                    } else {
                        content = new String(page.getContentData(), page.getContentCharset());
                    }
                    directives = RobotstxtParser.parse(content, config.getUserAgentName());
                } catch (Exception e) {
                    logger.error("Error occurred while fetching (robots) url: " + robotsTxtUrl.getURL(), e);
                }
            }
        }
    } finally {
        if (fetchResult != null) {
            fetchResult.discardContentIfNotConsumed();
        }
    }
    if (directives == null) {
        // We still need to have this object to keep track of the time we
        // fetched it
        directives = new HostDirectives();
    }
    synchronized (host2directivesCache) {
        if (host2directivesCache.size() == config.getCacheSize()) {
            String minHost = null;
            long minAccessTime = Long.MAX_VALUE;
            for (Entry<String, HostDirectives> entry : host2directivesCache.entrySet()) {
                if (entry.getValue().getLastAccessTime() < minAccessTime) {
                    minAccessTime = entry.getValue().getLastAccessTime();
                    minHost = entry.getKey();
                }
            }
            host2directivesCache.remove(minHost);
        }
        host2directivesCache.put(host, directives);
    }
    return directives;
}

From source file:org.nmdp.service.epitope.task.URLProcessor.java

public long getFtpLastModifiedTime(URL url) {
    FTPClient ftpClient = new FTPClient();
    try {// w  ww . ja v  a  2s.  c o m
        ftpClient.connect(url.getHost(), url.getPort() == -1 ? url.getDefaultPort() : url.getPort());
        ftpClient.login("anonymous", "anonymous");
        ftpClient.enterLocalPassiveMode();
        String filePath = url.getPath();
        String time = ftpClient.getModificationTime(filePath);
        //logger.debug("server replied: " + time);
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
        String timePart = time.split(" ")[1];
        Date modificationTime = dateFormat.parse(timePart);
        //logger.debug("parsed time: " + modificationTime);
        return modificationTime.getTime();
    } catch (Exception e) {
        logger.error("failed to parse time for url: " + url, e);
        return 0;
    } finally {
        if (ftpClient.isConnected()) {
            try {
                ftpClient.disconnect();
            } catch (IOException ex) {
                ex.printStackTrace();
            }
        }
    }
}

From source file:io.github.cidisk.indexcrawler.robotstxt.RobotstxtServer.java

private HostDirectives fetchDirectives(URL url) {
    WebURL robotsTxtUrl = new WebURL();
    String host = getHost(url);/*from  w w  w  . j a  v a  2 s. c om*/
    String port = (url.getPort() == url.getDefaultPort() || url.getPort() == -1) ? "" : ":" + url.getPort();
    robotsTxtUrl.setURL("http://" + host + port + "/robots.txt");
    HostDirectives directives = null;
    PageFetchResult fetchResult = null;
    try {
        fetchResult = pageFetcher.fetchPage(robotsTxtUrl);
        if (fetchResult.getStatusCode() == HttpStatus.SC_OK) {
            Page page = new Page(robotsTxtUrl);
            fetchResult.fetchContent(page);
            if (Util.hasPlainTextContent(page.getContentType())) {
                String content;
                if (page.getContentCharset() == null) {
                    content = new String(page.getContentData());
                } else {
                    content = new String(page.getContentData(), page.getContentCharset());
                }
                directives = RobotstxtParser.parse(content, config.getUserAgentName());
            }
        }
    } catch (SocketException | UnknownHostException | SocketTimeoutException se) {
        // No logging here, as it just means that robots.txt doesn't exist on this server which is perfectly ok
    } catch (PageBiggerThanMaxSizeException pbtms) {
        logger.error("Error occurred while fetching (robots) url: {}, {}", robotsTxtUrl.getURL(),
                pbtms.getMessage());
    } catch (Exception e) {
        logger.error("Error occurred while fetching (robots) url: " + robotsTxtUrl.getURL(), e);
    } finally {
        if (fetchResult != null) {
            fetchResult.discardContentIfNotConsumed();
        }
    }

    if (directives == null) {
        // We still need to have this object to keep track of the time we
        // fetched it
        directives = new HostDirectives();
    }
    synchronized (host2directivesCache) {
        if (host2directivesCache.size() == config.getCacheSize()) {
            String minHost = null;
            long minAccessTime = Long.MAX_VALUE;
            for (Entry<String, HostDirectives> entry : host2directivesCache.entrySet()) {
                if (entry.getValue().getLastAccessTime() < minAccessTime) {
                    minAccessTime = entry.getValue().getLastAccessTime();
                    minHost = entry.getKey();
                }
            }
            host2directivesCache.remove(minHost);
        }
        host2directivesCache.put(host, directives);
    }
    return directives;
}

From source file:com.gargoylesoftware.htmlunit.CookieManager.java

/**
 * Gets the port of the URL./*from   w  w w.  ja  v a  2  s . co m*/
 * This functionality is implemented here as protected method to allow subclass to change it
 * as workaround to <a href="http://code.google.com/p/googleappengine/issues/detail?id=4784">
 * Google App Engine bug 4784</a>.
 * @param url the URL
 * @return the port use to connect the server
 */
protected int getPort(final URL url) {
    if (url.getPort() != -1) {
        return url.getPort();
    }
    return url.getDefaultPort();
}

From source file:org.electrologic.convergence.server.NotaryBundleServlet.java

@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
    try {/*from   w w  w. j  ava2  s. c  o  m*/
        // get request URL, hostname and port and REST interface anddress
        URL requestUrl = new URL(req.getRequestURL().toString());
        String host = requestUrl.getHost();
        int port = requestUrl.getPort();
        if (port == -1) {
            port = requestUrl.getDefaultPort();
        }
        // create JSON object
        JSONObject result = new JSONObject();
        result.put("version", 1);

        JSONObject hostElement = new JSONObject();
        hostElement.put("host", host);
        hostElement.put("http_port", 80); // the FF addon seems to have a problem when this is not present
        hostElement.put("ssl_port", port);
        hostElement.put("certificate", pemCert);
        JSONArray hostList = new JSONArray();
        hostList.put(hostElement);
        result.put("hosts", hostList);

        result.put("name", "Convergence J2EE Server");
        result.put("bundle_location", requestUrl.toString());

        String resultStr = result.toString();
        resp.setCharacterEncoding("UTF-8");
        resp.setContentType("application/json");
        resp.getOutputStream().print(resultStr);
    } catch (JSONException ex) {
        String msg = "Failed to construct JSON result.";
        logger.error(msg, ex);
        throw new ServletException(msg, ex);
    }
}