Example usage for java.net URL toExternalForm

List of usage examples for java.net URL toExternalForm

Introduction

In this page you can find the example usage for java.net URL toExternalForm.

Prototype

public String toExternalForm() 

Source Link

Document

Constructs a string representation of this URL .

Usage

From source file:bixo.robots.RobotUtils.java

/**
 * Externally visible, static method for use in tools and for testing.
 * Fetch the indicated robots.txt file, parse it, and generate rules.
 * //  w w  w.j av  a  2s  .  c  o  m
 * @param fetcher Fetcher for downloading robots.txt file
 * @param robotsUrl URL to robots.txt file
 * @return Robot rules
 */
public static BaseRobotRules getRobotRules(BaseFetcher fetcher, BaseRobotsParser parser, URL robotsUrl) {

    try {
        String urlToFetch = robotsUrl.toExternalForm();
        ScoredUrlDatum scoredUrl = new ScoredUrlDatum(urlToFetch);
        FetchedDatum result = fetcher.get(scoredUrl);

        // HACK! DANGER! Some sites will redirect the request to the top-level domain
        // page, without returning a 404. So look for a response which has a redirect,
        // and the fetched content is not plain text, and assume it's one of these...
        // which is the same as not having a robots.txt file.

        String contentType = result.getContentType();
        boolean isPlainText = (contentType != null) && (contentType.startsWith("text/plain"));
        if ((result.getNumRedirects() > 0) && !isPlainText) {
            return parser.failedFetch(HttpStatus.SC_GONE);
        }

        return parser.parseContent(urlToFetch, result.getContentBytes(), result.getContentType(),
                fetcher.getUserAgent().getAgentName());
    } catch (HttpFetchException e) {
        return parser.failedFetch(e.getHttpStatus());
    } catch (IOFetchException e) {
        return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR);
    } catch (RedirectFetchException e) {
        // Other sites will have circular redirects, so treat this as a missing robots.txt
        return parser.failedFetch(HttpStatus.SC_GONE);
    } catch (Exception e) {
        LOGGER.error("Unexpected exception fetching robots.txt: " + robotsUrl, e);
        return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR);
    } catch (Throwable t) {
        LOGGER.error("Unexpected throwable caught while fetching robots.tx: " + robotsUrl, t);
        return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:crawlercommons.robots.RobotUtils.java

/**
 * Externally visible, static method for use in tools and for testing. Fetch
 * the indicated robots.txt file, parse it, and generate rules.
 * //  w  ww  .j  a v  a  2  s.  c  o  m
 * @param fetcher
 *            Fetcher for downloading robots.txt file
 * @param robotsUrl
 *            URL to robots.txt file
 * @return Robot rules
 */
public static BaseRobotRules getRobotRules(BaseHttpFetcher fetcher, BaseRobotsParser parser, URL robotsUrl) {

    try {
        String urlToFetch = robotsUrl.toExternalForm();
        FetchedResult result = fetcher.get(urlToFetch);

        // HACK! DANGER! Some sites will redirect the request to the
        // top-level domain
        // page, without returning a 404. So look for a response which has a
        // redirect,
        // and the fetched content is not plain text, and assume it's one of
        // these...
        // which is the same as not having a robots.txt file.

        String contentType = result.getContentType();
        boolean isPlainText = (contentType != null) && (contentType.startsWith("text/plain"));
        if ((result.getNumRedirects() > 0) && !isPlainText) {
            return parser.failedFetch(HttpStatus.SC_GONE);
        }

        return parser.parseContent(urlToFetch, result.getContent(), result.getContentType(),
                fetcher.getUserAgent().getAgentName());
    } catch (HttpFetchException e) {
        return parser.failedFetch(e.getHttpStatus());
    } catch (IOFetchException e) {
        return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR);
    } catch (RedirectFetchException e) {
        // Other sites will have circular redirects, so treat this as a
        // missing robots.txt
        return parser.failedFetch(HttpStatus.SC_GONE);
    } catch (Exception e) {
        LOGGER.error("Unexpected exception fetching robots.txt: " + robotsUrl, e);
        return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:Main.java

/**
 * Returns a copy of the specified URL; used to ensure that mutable 
 * internal state is not leaked out to clients
 * @param url//from   w  w w. j  a  va 2s . com
 * @return
 */
static URL copyUrl(final URL url) {
    // If null, return
    if (url == null) {
        return url;
    }

    try {
        // Copy 
        return new URL(url.toExternalForm());
    } catch (final MalformedURLException e) {
        throw new RuntimeException("Error in copying URL", e);
    }
}

From source file:de.nava.informa.parsers.OPMLParser.java

public static Collection parse(URL aURL) throws IOException, ParseException {
    return parse(new InputSource(aURL.toExternalForm()), aURL);
}

From source file:com.gargoylesoftware.htmlunit.protocol.data.DataUrlDecoder.java

/**
 * Decodes a data URL providing simple access to the information contained by the URL.
 * @param url the URL to decode//from w w  w  . j  av  a2 s . co m
 * @return the {@link DataUrlDecoder} holding decoded information
 * @throws UnsupportedEncodingException if the encoding specified by the data URL is invalid or not
 * available on the JVM
 * @throws DecoderException if decoding didn't success
 */
public static DataUrlDecoder decode(final URL url) throws UnsupportedEncodingException, DecoderException {
    return decodeDataURL(url.toExternalForm());
}

From source file:com.trivago.mail.pigeon.configuration.Settings.java

public static Settings create(String fileName, boolean nocache) {
    log.trace("Settings instance requested");
    if (fileName == null && instance != null && !nocache) {
        log.trace("Returning cached instance");
        return instance;
    } else if (fileName == null && instance == null) {
        log.trace("Requesting ENV PIDGEON_CONFIG as path to properties as fileName was null");

        String propertyFileName = System.getenv("PIDGEON_CONFIG");

        if (propertyFileName == null || propertyFileName.equals("")) {
            log.warn(// www . j  ava 2 s.c o m
                    "ENV is empty and no filename was given -> no config properties found! Using configuration.properties");
        }

        URL resource = Thread.currentThread().getContextClassLoader().getResource("configuration.properties");
        propertyFileName = resource.toExternalForm();
        instance = new Settings();

        try {
            instance.setConfiguration(new PropertiesConfiguration(propertyFileName));
        } catch (ConfigurationException e) {
            log.error(e);
            throw new ConfigurationRuntimeException(e);
        }
    } else if (fileName != null && instance == null) {
        log.trace("Requesting file properties from " + fileName);
        instance = new Settings();

        try {
            instance.setConfiguration(new PropertiesConfiguration(fileName));
        } catch (ConfigurationException e) {
            log.error(e);
            throw new ConfigurationRuntimeException(e);
        }
    }
    return instance;
}

From source file:com.asakusafw.shafu.core.net.ShafuNetwork.java

private static <T> T processHttpContent(URL url, IContentProcessor<T> processor) throws IOException {
    HttpClient client = Activator.getHttpClient();
    HttpGet request = new HttpGet(url.toExternalForm());
    HttpResponse response = client.execute(request);
    try {//from   w  w  w  .  j  a  v a  2  s. c om
        if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
            return processor.process(response.getEntity().getContent());
        } else {
            throw new IOException(MessageFormat.format(Messages.ShafuNetwork_failedToOpenHttpContent,
                    request.getURI(), response.getStatusLine()));
        }
    } finally {
        closeQuietly(response);
    }
}

From source file:com.meltmedia.rodimus.RodimusCli.java

public static StreamSource createStreamSource(URL url) throws IOException {
    StreamSource source = new StreamSource();
    source.setSystemId(url.toExternalForm());
    source.setInputStream(url.openStream());
    return source;
}

From source file:com.autentia.tnt.util.JPivotUtils.java

/**
 * Crea una conexin con el datasource por defecto sobre el cubo OLAP y ejecuta la query devolviendo el resultado como
 * <b>ResultSet</b>/*from   www. j  av a2  s .  c o m*/
 * 
 * @param mdxQuery query a ejecutar
 * @param cubeSchema esquema situado en src/main/resources que representa el cubo OLAP
 * @return un objeto ResultSet con la consulta realizada
 * @throws ClassNotFoundException
 * @throws SQLException
 */
public static ResultSet getResultSet(String mdxQuery, String cubeSchema)
        throws ClassNotFoundException, SQLException {
    Class.forName("mondrian.olap4j.MondrianOlap4jDriver");
    final URL url = JPivotUtils.class.getResource(cubeSchema);
    final String catalog = url.toExternalForm();

    final Connection conn = DriverManager.getConnection(
            "jdbc:mondrian:DataSource=java:comp/" + DATA_SOURCE + "/galileoDS;Catalog=" + catalog + ";");
    final OlapWrapper wrapper = (OlapWrapper) conn;
    final OlapConnection olConn = wrapper.unwrap(OlapConnection.class);
    final OlapStatement statement = olConn.createStatement();

    return statement.executeOlapQuery(mdxQuery);
}

From source file:com.mothsoft.alexis.util.NetworkingUtil.java

public static HttpClientResponse post(final URL url, final List<NameValuePair> params) throws IOException {
    final HttpPost post = new HttpPost(url.toExternalForm());

    UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params);
    post.setEntity(formEntity);/*from  ww w .j av  a  2s .com*/

    post.addHeader("Accept-Charset", "UTF-8");

    final HttpClient client = getClient();
    HttpResponse response = client.execute(post);
    int status = response.getStatusLine().getStatusCode();

    if (status != 200) {
        throw new IOException("status: " + status);
    }

    final HttpEntity entity = response.getEntity();
    final InputStream is = entity.getContent();
    final Charset charset = getCharset(entity);

    return new HttpClientResponse(post, status, null, null, is, charset);
}