List of usage examples for java.net URL toExternalForm
public String toExternalForm()
From source file:bixo.robots.RobotUtils.java
/** * Externally visible, static method for use in tools and for testing. * Fetch the indicated robots.txt file, parse it, and generate rules. * // w w w.j av a 2s . c o m * @param fetcher Fetcher for downloading robots.txt file * @param robotsUrl URL to robots.txt file * @return Robot rules */ public static BaseRobotRules getRobotRules(BaseFetcher fetcher, BaseRobotsParser parser, URL robotsUrl) { try { String urlToFetch = robotsUrl.toExternalForm(); ScoredUrlDatum scoredUrl = new ScoredUrlDatum(urlToFetch); FetchedDatum result = fetcher.get(scoredUrl); // HACK! DANGER! Some sites will redirect the request to the top-level domain // page, without returning a 404. So look for a response which has a redirect, // and the fetched content is not plain text, and assume it's one of these... // which is the same as not having a robots.txt file. String contentType = result.getContentType(); boolean isPlainText = (contentType != null) && (contentType.startsWith("text/plain")); if ((result.getNumRedirects() > 0) && !isPlainText) { return parser.failedFetch(HttpStatus.SC_GONE); } return parser.parseContent(urlToFetch, result.getContentBytes(), result.getContentType(), fetcher.getUserAgent().getAgentName()); } catch (HttpFetchException e) { return parser.failedFetch(e.getHttpStatus()); } catch (IOFetchException e) { return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR); } catch (RedirectFetchException e) { // Other sites will have circular redirects, so treat this as a missing robots.txt return parser.failedFetch(HttpStatus.SC_GONE); } catch (Exception e) { LOGGER.error("Unexpected exception fetching robots.txt: " + robotsUrl, e); return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR); } catch (Throwable t) { LOGGER.error("Unexpected throwable caught while fetching robots.tx: " + robotsUrl, t); return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR); } }
From source file:crawlercommons.robots.RobotUtils.java
/** * Externally visible, static method for use in tools and for testing. Fetch * the indicated robots.txt file, parse it, and generate rules. * // w ww .j a v a 2 s. c o m * @param fetcher * Fetcher for downloading robots.txt file * @param robotsUrl * URL to robots.txt file * @return Robot rules */ public static BaseRobotRules getRobotRules(BaseHttpFetcher fetcher, BaseRobotsParser parser, URL robotsUrl) { try { String urlToFetch = robotsUrl.toExternalForm(); FetchedResult result = fetcher.get(urlToFetch); // HACK! DANGER! Some sites will redirect the request to the // top-level domain // page, without returning a 404. So look for a response which has a // redirect, // and the fetched content is not plain text, and assume it's one of // these... // which is the same as not having a robots.txt file. String contentType = result.getContentType(); boolean isPlainText = (contentType != null) && (contentType.startsWith("text/plain")); if ((result.getNumRedirects() > 0) && !isPlainText) { return parser.failedFetch(HttpStatus.SC_GONE); } return parser.parseContent(urlToFetch, result.getContent(), result.getContentType(), fetcher.getUserAgent().getAgentName()); } catch (HttpFetchException e) { return parser.failedFetch(e.getHttpStatus()); } catch (IOFetchException e) { return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR); } catch (RedirectFetchException e) { // Other sites will have circular redirects, so treat this as a // missing robots.txt return parser.failedFetch(HttpStatus.SC_GONE); } catch (Exception e) { LOGGER.error("Unexpected exception fetching robots.txt: " + robotsUrl, e); return parser.failedFetch(HttpStatus.SC_INTERNAL_SERVER_ERROR); } }
From source file:Main.java
/** * Returns a copy of the specified URL; used to ensure that mutable * internal state is not leaked out to clients * @param url//from w w w. j a va 2s . com * @return */ static URL copyUrl(final URL url) { // If null, return if (url == null) { return url; } try { // Copy return new URL(url.toExternalForm()); } catch (final MalformedURLException e) { throw new RuntimeException("Error in copying URL", e); } }
From source file:de.nava.informa.parsers.OPMLParser.java
public static Collection parse(URL aURL) throws IOException, ParseException { return parse(new InputSource(aURL.toExternalForm()), aURL); }
From source file:com.gargoylesoftware.htmlunit.protocol.data.DataUrlDecoder.java
/** * Decodes a data URL providing simple access to the information contained by the URL. * @param url the URL to decode//from w w w . j av a2 s . co m * @return the {@link DataUrlDecoder} holding decoded information * @throws UnsupportedEncodingException if the encoding specified by the data URL is invalid or not * available on the JVM * @throws DecoderException if decoding didn't success */ public static DataUrlDecoder decode(final URL url) throws UnsupportedEncodingException, DecoderException { return decodeDataURL(url.toExternalForm()); }
From source file:com.trivago.mail.pigeon.configuration.Settings.java
public static Settings create(String fileName, boolean nocache) { log.trace("Settings instance requested"); if (fileName == null && instance != null && !nocache) { log.trace("Returning cached instance"); return instance; } else if (fileName == null && instance == null) { log.trace("Requesting ENV PIDGEON_CONFIG as path to properties as fileName was null"); String propertyFileName = System.getenv("PIDGEON_CONFIG"); if (propertyFileName == null || propertyFileName.equals("")) { log.warn(// www . j ava 2 s.c o m "ENV is empty and no filename was given -> no config properties found! Using configuration.properties"); } URL resource = Thread.currentThread().getContextClassLoader().getResource("configuration.properties"); propertyFileName = resource.toExternalForm(); instance = new Settings(); try { instance.setConfiguration(new PropertiesConfiguration(propertyFileName)); } catch (ConfigurationException e) { log.error(e); throw new ConfigurationRuntimeException(e); } } else if (fileName != null && instance == null) { log.trace("Requesting file properties from " + fileName); instance = new Settings(); try { instance.setConfiguration(new PropertiesConfiguration(fileName)); } catch (ConfigurationException e) { log.error(e); throw new ConfigurationRuntimeException(e); } } return instance; }
From source file:com.asakusafw.shafu.core.net.ShafuNetwork.java
private static <T> T processHttpContent(URL url, IContentProcessor<T> processor) throws IOException { HttpClient client = Activator.getHttpClient(); HttpGet request = new HttpGet(url.toExternalForm()); HttpResponse response = client.execute(request); try {//from w w w . j a v a 2 s. c om if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { return processor.process(response.getEntity().getContent()); } else { throw new IOException(MessageFormat.format(Messages.ShafuNetwork_failedToOpenHttpContent, request.getURI(), response.getStatusLine())); } } finally { closeQuietly(response); } }
From source file:com.meltmedia.rodimus.RodimusCli.java
public static StreamSource createStreamSource(URL url) throws IOException { StreamSource source = new StreamSource(); source.setSystemId(url.toExternalForm()); source.setInputStream(url.openStream()); return source; }
From source file:com.autentia.tnt.util.JPivotUtils.java
/** * Crea una conexin con el datasource por defecto sobre el cubo OLAP y ejecuta la query devolviendo el resultado como * <b>ResultSet</b>/*from www. j av a2 s . c o m*/ * * @param mdxQuery query a ejecutar * @param cubeSchema esquema situado en src/main/resources que representa el cubo OLAP * @return un objeto ResultSet con la consulta realizada * @throws ClassNotFoundException * @throws SQLException */ public static ResultSet getResultSet(String mdxQuery, String cubeSchema) throws ClassNotFoundException, SQLException { Class.forName("mondrian.olap4j.MondrianOlap4jDriver"); final URL url = JPivotUtils.class.getResource(cubeSchema); final String catalog = url.toExternalForm(); final Connection conn = DriverManager.getConnection( "jdbc:mondrian:DataSource=java:comp/" + DATA_SOURCE + "/galileoDS;Catalog=" + catalog + ";"); final OlapWrapper wrapper = (OlapWrapper) conn; final OlapConnection olConn = wrapper.unwrap(OlapConnection.class); final OlapStatement statement = olConn.createStatement(); return statement.executeOlapQuery(mdxQuery); }
From source file:com.mothsoft.alexis.util.NetworkingUtil.java
public static HttpClientResponse post(final URL url, final List<NameValuePair> params) throws IOException { final HttpPost post = new HttpPost(url.toExternalForm()); UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params); post.setEntity(formEntity);/*from ww w .j av a 2s .com*/ post.addHeader("Accept-Charset", "UTF-8"); final HttpClient client = getClient(); HttpResponse response = client.execute(post); int status = response.getStatusLine().getStatusCode(); if (status != 200) { throw new IOException("status: " + status); } final HttpEntity entity = response.getEntity(); final InputStream is = entity.getContent(); final Charset charset = getCharset(entity); return new HttpClientResponse(post, status, null, null, is, charset); }