List of usage examples for java.net URI toASCIIString
public String toASCIIString()
From source file:org.paxle.filter.robots.impl.RobotsTxtFilter.java
void checkRobotsTxt(final Map<URI, LinkInfo> linkMap, final Counter c) { if (linkMap == null || linkMap.size() == 0) return;/*from ww w . j a v a 2s . c o m*/ // check for blocking URIs final Collection<URI> uriToCheck = this.getOkURI(linkMap); if (uriToCheck.size() == 0) return; c.t += uriToCheck.size(); final Collection<URI> disallowedURI = this.robotsTxtManager.isDisallowed(uriToCheck); // mark disallowed URI as blocked if (disallowedURI != null && disallowedURI.size() > 0) { StringBuffer debugMsg = new StringBuffer(); for (URI location : disallowedURI) { // getting the metadata of the disallowed URI LinkInfo meta = linkMap.get(location); if (!meta.hasStatus(Status.OK)) continue; // mark URI as filtered meta.setStatus(Status.FILTERED, "Access disallowed by robots.txt"); c.c++; if (logger.isDebugEnabled()) { debugMsg.append(String.format("\t%s\r\n", location.toASCIIString())); } } if (logger.isDebugEnabled()) { this.logger.debug(String.format("%d URI blocked:\r\n%s", Integer.valueOf(disallowedURI.size()), debugMsg.toString())); } } }
From source file:org.apache.olingo.commons.core.serialization.JsonPropertyDeserializer.java
protected ResWrap<Property> doDeserialize(final JsonParser parser) throws IOException { final ObjectNode tree = (ObjectNode) parser.getCodec().readTree(parser); final String metadataETag; final URI contextURL; final PropertyImpl property = new PropertyImpl(); if (tree.hasNonNull(Constants.JSON_METADATA_ETAG)) { metadataETag = tree.get(Constants.JSON_METADATA_ETAG).textValue(); tree.remove(Constants.JSON_METADATA_ETAG); } else {//from w w w . j a v a 2 s .c o m metadataETag = null; } if (tree.hasNonNull(Constants.JSON_CONTEXT)) { contextURL = URI.create(tree.get(Constants.JSON_CONTEXT).textValue()); property.setName(StringUtils.substringAfterLast(contextURL.toASCIIString(), "/")); tree.remove(Constants.JSON_CONTEXT); } else if (tree.hasNonNull(Constants.JSON_METADATA)) { contextURL = URI.create(tree.get(Constants.JSON_METADATA).textValue()); property.setType(new EdmTypeInfo.Builder() .setTypeExpression(StringUtils.substringAfterLast(contextURL.toASCIIString(), "#")).build() .internal()); tree.remove(Constants.JSON_METADATA); } else { contextURL = null; } if (tree.has(jsonType)) { property.setType( new EdmTypeInfo.Builder().setTypeExpression(tree.get(jsonType).textValue()).build().internal()); tree.remove(jsonType); } if (tree.has(Constants.JSON_NULL) && tree.get(Constants.JSON_NULL).asBoolean()) { property.setValue(ValueType.PRIMITIVE, null); tree.remove(Constants.JSON_NULL); } if (property.getValue() == null) { try { value(property, tree.has(Constants.VALUE) ? tree.get(Constants.VALUE) : tree, parser.getCodec()); } catch (final EdmPrimitiveTypeException e) { throw new IOException(e); } tree.remove(Constants.VALUE); } // any remaining entry is supposed to be an annotation or is ignored for (final Iterator<Map.Entry<String, JsonNode>> itor = tree.fields(); itor.hasNext();) { final Map.Entry<String, JsonNode> field = itor.next(); if (field.getKey().charAt(0) == '@') { final Annotation annotation = new AnnotationImpl(); annotation.setTerm(field.getKey().substring(1)); try { value(annotation, field.getValue(), parser.getCodec()); } catch (final EdmPrimitiveTypeException e) { throw new IOException(e); } property.getAnnotations().add(annotation); } } return new ResWrap<Property>(contextURL, metadataETag, property); }
From source file:org.paxle.crawler.impl.CrawlerTools.java
/** * Generates a file-listing in a standard format understood by Paxle. Currently this format * consists of a rudimentary HTML-page linking to the files in the list given by * <code>fileListIt</code>. The resulting format of this list not yet finalized and subject * to change./*from w ww . j a v a 2 s.c o m*/ * * @param cdoc the {@link ICrawlerDocument} to save the dir-listing to * @param tfm if <code>cdoc</code> does not already contain a * {@link ICrawlerDocument#getContent() content-file}, the {@link ITempFileManager} is * used to create one. * @param fileListIt the file-listing providing the required information to include in the result * @param compress determines whether the content should be compressed transparently (via GZip) * to save space. Compression reduces the size of the representation of large directories * up to a sixth. */ public void saveListing(final ICrawlerDocument cdoc, final Iterator<DirlistEntry> fileListIt, boolean inclParent, boolean compress) throws IOException { if (cdoc == null) throw new NullPointerException("The crawler-document is null."); File content = cdoc.getContent(); if (content == null) { content = this.tfm.createTempFile(); cdoc.setContent(content); } final String charset = "UTF-8"; if (compress) { cdoc.setMimeType("application/x-gzip"); } else { cdoc.setMimeType("text/html"); cdoc.setCharset(charset); } OutputStream writerOut = null; Formatter writer = null; try { // no need to buffer here, the Formatter uses a buffer internally writerOut = new FileOutputStream(content); /* Since the generated format is plain text, contains much redundant information and can potentially * become quite large (several hundred KB), a parameter offering compression can be specified. */ if (compress) writerOut = new GZIPOutputStream(writerOut); writer = new Formatter(writerOut, charset); // getting the base dir String baseURL = cdoc.getLocation().toASCIIString(); if (!baseURL.endsWith("/")) baseURL += "/"; writer.format("<html><head><title>Index of %s</title></head><hr><table><tbody>\r\n", cdoc.getLocation()); if (inclParent) { // getting the parent dir String parentDir = "/"; if (baseURL.length() > 1) { parentDir = baseURL.substring(0, baseURL.length() - 1); int idx = parentDir.lastIndexOf("/"); parentDir = parentDir.substring(0, idx + 1); } writer.format("<tr><td colspan=\"3\"><a href=\"%s\">Up to higher level directory</a></td></tr>\r\n", parentDir); } // generate directory listing // FIXME: we need to escape the urls properly here. while (fileListIt.hasNext()) { final DirlistEntry entry = fileListIt.next(); final String nexturi; final URI entryuri = entry.getFileURI(); if (entryuri == null) { nexturi = baseURL + entry.getFileName(); } else { nexturi = entryuri.toASCIIString(); } writer.format( "<tr>" + "<td><a href=\"%1$s\">%2$s</a></td>" + "<td>%3$d Bytes</td>" + "<td>%4$tY-%4$tm-%4$td %4$tT</td>" + "</tr>\r\n", nexturi, entry.getFileName(), Long.valueOf(entry.getSize()), Long.valueOf(entry.getLastModified())); } writer.format("</tbody></table><hr></body></html>"); cdoc.setStatus(ICrawlerDocument.Status.OK); } catch (UnsupportedEncodingException e) { // XXX: shouldn't this be an Error? throw new RuntimeException(charset + " not supported", e); } finally { if (writer != null) writer.close(); else if (writerOut != null) writerOut.close(); } }
From source file:org.apache.taverna.robundle.TestBundles.java
@Test public void getReference() throws Exception { try (Bundle bundle = Bundles.createBundle()) { Path hello = bundle.getRoot().resolve("hello"); Bundles.setReference(hello, URI.create("http://example.org/test")); URI uri = Bundles.getReference(hello); assertEquals("http://example.org/test", uri.toASCIIString()); }/*w w w . ja v a 2 s .c o m*/ }
From source file:org.apache.olingo.client.core.serialization.JsonPropertyDeserializer.java
protected ResWrap<Property> doDeserialize(final JsonParser parser) throws IOException { final ObjectNode tree = parser.getCodec().readTree(parser); final String metadataETag; final URI contextURL; final Property property = new Property(); if (tree.hasNonNull(Constants.JSON_METADATA_ETAG)) { metadataETag = tree.get(Constants.JSON_METADATA_ETAG).textValue(); tree.remove(Constants.JSON_METADATA_ETAG); } else {/* www . j a va 2 s . c o m*/ metadataETag = null; } if (tree.hasNonNull(Constants.JSON_CONTEXT)) { contextURL = URI.create(tree.get(Constants.JSON_CONTEXT).textValue()); property.setName(StringUtils.substringAfterLast(contextURL.toASCIIString(), "/")); tree.remove(Constants.JSON_CONTEXT); } else if (tree.hasNonNull(Constants.JSON_METADATA)) { contextURL = URI.create(tree.get(Constants.JSON_METADATA).textValue()); property.setType(new EdmTypeInfo.Builder() .setTypeExpression(StringUtils.substringAfterLast(contextURL.toASCIIString(), "#")).build() .internal()); tree.remove(Constants.JSON_METADATA); } else { contextURL = null; } if (tree.has(Constants.JSON_TYPE)) { property.setType(new EdmTypeInfo.Builder().setTypeExpression(tree.get(Constants.JSON_TYPE).textValue()) .build().internal()); tree.remove(Constants.JSON_TYPE); } if (tree.has(Constants.JSON_NULL) && tree.get(Constants.JSON_NULL).asBoolean()) { property.setValue(ValueType.PRIMITIVE, null); tree.remove(Constants.JSON_NULL); } if (property.getValue() == null) { try { value(property, tree.has(Constants.VALUE) ? tree.get(Constants.VALUE) : tree, parser.getCodec()); } catch (final EdmPrimitiveTypeException e) { throw new IOException(e); } tree.remove(Constants.VALUE); } Set<String> toRemove = new HashSet<String>(); // any remaining entry is supposed to be an annotation or is ignored for (final Iterator<Map.Entry<String, JsonNode>> itor = tree.fields(); itor.hasNext();) { final Map.Entry<String, JsonNode> field = itor.next(); if (field.getKey().charAt(0) == '@') { final Annotation annotation = new Annotation(); annotation.setTerm(field.getKey().substring(1)); try { value(annotation, field.getValue(), parser.getCodec()); } catch (final EdmPrimitiveTypeException e) { throw new IOException(e); } property.getAnnotations().add(annotation); } else if (field.getKey().charAt(0) == '#') { final Operation operation = new Operation(); operation.setMetadataAnchor(field.getKey()); final ObjectNode opNode = (ObjectNode) tree.get(field.getKey()); operation.setTitle(opNode.get(Constants.ATTR_TITLE).asText()); operation.setTarget(URI.create(opNode.get(Constants.ATTR_TARGET).asText())); property.getOperations().add(operation); toRemove.add(field.getKey()); } } tree.remove(toRemove); return new ResWrap<Property>(contextURL, metadataETag, property); }
From source file:org.mitre.mpf.mvc.controller.MediaController.java
@RequestMapping(value = "/saveURL", method = RequestMethod.POST) @ResponseBody/*from w ww .j av a 2s.c o m*/ public Map<String, String> saveMedia(@RequestParam(value = "urls", required = true) String[] urls, @RequestParam(value = "desiredpath", required = true) String desiredpath, HttpServletResponse response) throws WfmProcessingException, MpfServiceException { log.debug("URL Upload to Directory:" + desiredpath + " urls:" + urls.length); String err = "Illegal or missing desiredpath"; if (desiredpath == null) { log.error(err); throw new HttpServerErrorException(HttpStatus.INTERNAL_SERVER_ERROR, err); } ; String webTmpDirectory = propertiesUtil.getRemoteMediaCacheDirectory().getAbsolutePath(); //verify the desired path File desiredPath = new File(desiredpath); if (!desiredPath.exists() || !desiredPath.getAbsolutePath().startsWith(webTmpDirectory)) {//make sure it is valid and within the remote-media directory log.error(err); throw new HttpServerErrorException(HttpStatus.INTERNAL_SERVER_ERROR, err); } //passing in urls as a list of Strings //download the media to the server //build a map of success or failure for each file with a custom response object Map<String, String> urlResultMap = new HashMap<String, String>(); for (String enteredURL : urls) { enteredURL = enteredURL.trim(); URI uri; try { uri = new URI(enteredURL); //the check for absolute URI determines if any scheme is present, regardless of validity //(which is checked in both this and the next try block) if (!uri.isAbsolute()) { uri = new URI("http://" + uri.toASCIIString()); } } catch (URISyntaxException incorrectUriTranslation) { log.error("The string {} did not translate cleanly to a URI.", enteredURL, incorrectUriTranslation); urlResultMap.put(enteredURL, "String did not cleanly convert to URI"); continue; } File newFile = null; String localName = null; try { URL url = uri.toURL(); //caught by MalformedURLException //will throw an IOException,which is already caught HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("HEAD"); connection.connect(); connection.disconnect(); String filename = url.getFile(); if (filename.isEmpty()) { String err2 = "The filename does not exist when uploading from the url '" + url + "'"; log.error(err2); urlResultMap.put(enteredURL, err2); continue; } if (!ioUtils.isApprovedFile(url)) { String contentType = ioUtils.getMimeType(url); String msg = "The media is not a supported type. Please add a whitelist." + contentType + " entry to the mediaType.properties file."; log.error(msg + " URL:" + url); urlResultMap.put(enteredURL, msg); continue; } localName = uri.getPath(); //we consider no path to be malformed for our purposes if (localName.isEmpty()) { throw new MalformedURLException(String.format("%s does not have valid path", uri)); } //use the full path name for the filename to allow for more unique filenames localName = localName.substring(1);//remove the leading '/' localName = localName.replace("/", "-");//replace the rest of the path with - //get a new unique filename incase the name currently exists newFile = ioUtils.getNewFileName(desiredpath, localName); //save the file FileUtils.copyURLToFile(url, newFile); log.info("Completed write of {} to {}", uri.getPath(), newFile.getAbsolutePath()); urlResultMap.put(enteredURL, "successful write to: " + newFile.getAbsolutePath()); } catch (MalformedURLException badUrl) { log.error("URI {} could not be converted. ", uri, badUrl); urlResultMap.put(enteredURL, "Unable to locate media at the provided address."); } catch (IOException badWrite) { log.error("Error writing media to temp file from {}.", enteredURL, badWrite); urlResultMap.put(enteredURL, "Unable to save media from this url. Please view the server logs for more information."); if (newFile != null && newFile.exists()) { newFile.delete(); } } catch (Exception failure) { //catch the remaining exceptions //this is most likely a failed connection log.error("Exception thrown while saving media from the url {}.", enteredURL, failure); urlResultMap.put(enteredURL, "Error while saving media from this url. Please view the server logs for more information."); } } return urlResultMap; }
From source file:org.apache.taverna.robundle.TestBundles.java
@Test public void getReferenceFromWin8() throws Exception { try (Bundle bundle = Bundles.createBundle()) { Path win8 = bundle.getRoot().resolve("win8"); Path win8Url = bundle.getRoot().resolve("win8.url"); Files.copy(getClass().getResourceAsStream("/win8.url"), win8Url); URI uri = Bundles.getReference(win8); assertEquals("http://example.com/made-in-windows-8", uri.toASCIIString()); }// w w w .j av a 2s . c o m }
From source file:com.fatwire.dta.sscrawler.App.java
protected void doWork(final CommandLine cmd) throws Exception { final Crawler crawler = new Crawler(); URI startUri = null; startUri = URI.create(cmd.getArgs()[1]); if (cmd.hasOption('m')) { crawler.setMaxPages(Integer.parseInt(cmd.getOptionValue('m'))); }/*from www . j av a 2 s . com*/ final int threads = Integer.parseInt(cmd.getOptionValue('t', "5")); if (startUri == null) { throw new IllegalArgumentException("startUri is not set"); } final int t = startUri.toASCIIString().indexOf("/ContentServer"); if (t == -1) { throw new IllegalArgumentException("/ContentServer is not found on the startUri."); } crawler.setStartUri(new URI(null, null, null, -1, startUri.getRawPath(), startUri.getRawQuery(), startUri.getFragment())); final HostConfig hc = createHostConfig(URI.create(startUri.toASCIIString().substring(0, t))); final String proxyUsername = cmd.getOptionValue("pu"); final String proxyPassword = cmd.getOptionValue("pw"); final String proxyHost = cmd.getOptionValue("ph"); final int proxyPort = Integer.parseInt(cmd.getOptionValue("", "8080")); if (StringUtils.isNotBlank(proxyUsername) && StringUtils.isNotBlank(proxyUsername)) { hc.setProxyCredentials(new UsernamePasswordCredentials(proxyUsername, proxyPassword)); } if (StringUtils.isNotBlank(proxyHost)) { hc.setProxyHost(new ProxyHost(proxyHost, proxyPort)); } else if (StringUtils.isNotBlank(System.getProperty("http.proxyhost")) && StringUtils.isNotBlank(System.getProperty("http.proxyport"))) { hc.setProxyHost(new ProxyHost(System.getProperty("http.proxyhost"), Integer.parseInt(System.getProperty("http.proxyport")))); } crawler.setHostConfig(hc); SSUriHelper helper = null; if (cmd.hasOption('f')) { final UriHelperFactory f = (UriHelperFactory) Class.forName(cmd.getOptionValue('f')).newInstance(); helper = f.create(crawler.getStartUri().getPath()); } else { helper = new SSUriHelper(crawler.getStartUri().getPath()); } final ThreadPoolExecutor readerPool = new RenderingThreadPool(threads); final MBeanServer platform = java.lang.management.ManagementFactory.getPlatformMBeanServer(); try { platform.registerMBean(readerPool, new ObjectName("com.fatwire.crawler:name=readerpool")); } catch (final Throwable x) { LogFactory.getLog(App.class).error(x.getMessage(), x); } crawler.setExecutor(readerPool); File path = null; if (cmd.hasOption('d')) { path = new File(cmd.getOptionValue("d")); } else { path = getOutputDir(); } if (path != null) { final SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd_HHmm"); path = new File(path, df.format(new Date())); path.mkdirs(); } crawler.setReporters(createReporters(path, helper)); crawler.setUriHelper(helper); try { crawler.work(); } finally { readerPool.shutdown(); try { platform.unregisterMBean(new ObjectName("com.fatwire.crawler:name=readerpool")); } catch (final Throwable x) { LogFactory.getLog(App.class).error(x.getMessage(), x); } } }
From source file:org.apache.olingo.commons.core.serialization.JsonEntitySetDeserializer.java
protected ResWrap<EntitySet> doDeserialize(final JsonParser parser) throws IOException { final ObjectNode tree = (ObjectNode) parser.getCodec().readTree(parser); if (!tree.has(Constants.VALUE)) { return null; }// ww w . ja v a2 s . co m final EntitySetImpl entitySet = new EntitySetImpl(); URI contextURL; if (tree.hasNonNull(Constants.JSON_CONTEXT)) { contextURL = URI.create(tree.get(Constants.JSON_CONTEXT).textValue()); tree.remove(Constants.JSON_CONTEXT); } else if (tree.hasNonNull(Constants.JSON_METADATA)) { contextURL = URI.create(tree.get(Constants.JSON_METADATA).textValue()); tree.remove(Constants.JSON_METADATA); } else { contextURL = null; } if (contextURL != null) { entitySet.setBaseURI(StringUtils.substringBefore(contextURL.toASCIIString(), Constants.METADATA)); } final String metadataETag; if (tree.hasNonNull(Constants.JSON_METADATA_ETAG)) { metadataETag = tree.get(Constants.JSON_METADATA_ETAG).textValue(); tree.remove(Constants.JSON_METADATA_ETAG); } else { metadataETag = null; } if (tree.hasNonNull(jsonCount)) { entitySet.setCount(tree.get(jsonCount).asInt()); tree.remove(jsonCount); } if (tree.hasNonNull(jsonNextLink)) { entitySet.setNext(URI.create(tree.get(jsonNextLink).textValue())); tree.remove(jsonNextLink); } if (tree.hasNonNull(jsonDeltaLink)) { entitySet.setDeltaLink(URI.create(tree.get(jsonDeltaLink).textValue())); tree.remove(jsonDeltaLink); } if (tree.hasNonNull(Constants.VALUE)) { final JsonEntityDeserializer entityDeserializer = new JsonEntityDeserializer(version, serverMode); for (JsonNode jsonNode : tree.get(Constants.VALUE)) { entitySet.getEntities() .add(entityDeserializer.doDeserialize(jsonNode.traverse(parser.getCodec())).getPayload()); } tree.remove(Constants.VALUE); } // any remaining entry is supposed to be an annotation or is ignored for (final Iterator<Map.Entry<String, JsonNode>> itor = tree.fields(); itor.hasNext();) { final Map.Entry<String, JsonNode> field = itor.next(); if (field.getKey().charAt(0) == '@') { final Annotation annotation = new AnnotationImpl(); annotation.setTerm(field.getKey().substring(1)); try { value(annotation, field.getValue(), parser.getCodec()); } catch (final EdmPrimitiveTypeException e) { throw new IOException(e); } entitySet.getAnnotations().add(annotation); } } return new ResWrap<EntitySet>(contextURL, metadataETag, entitySet); }
From source file:org.apache.olingo.ext.proxy.commons.InvokerInvocationHandler.java
@Override @SuppressWarnings({ "unchecked", "rawtypes" }) public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { if ("filter".equals(method.getName()) || "orderBy".equals(method.getName()) || "top".equals(method.getName()) || "skip".equals(method.getName()) || "expand".equals(method.getName()) || "select".equals(method.getName())) { invokeSelfMethod(method, args);/* w w w . j a v a2 s . c o m*/ return proxy; } else if ("operations".equals(method.getName()) && ArrayUtils.isEmpty(args)) { final EdmTypeInfo returnType = new EdmTypeInfo.Builder().setEdm(service.getClient().getCachedEdm()) .setTypeExpression(operation.returnType()).build(); final URI prefixURI = URIUtils.buildFunctionInvokeURI(this.baseURI, parameters); OperationInvocationHandler handler; if (returnType.isComplexType()) { if (returnType.isCollection()) { handler = OperationInvocationHandler.getInstance(new ComplexCollectionInvocationHandler( targetRef, service, getClient().newURIBuilder(prefixURI.toASCIIString()))); } else { handler = OperationInvocationHandler.getInstance(ComplexInvocationHandler.getInstance(targetRef, service, getClient().newURIBuilder(prefixURI.toASCIIString()))); } } else { if (returnType.isCollection()) { handler = OperationInvocationHandler.getInstance(new EntityCollectionInvocationHandler(service, null, targetRef, null, getClient().newURIBuilder(prefixURI.toASCIIString()))); } else { handler = OperationInvocationHandler .getInstance(EntityInvocationHandler.getInstance(prefixURI, targetRef, service)); } } return Proxy.newProxyInstance(Thread.currentThread().getContextClassLoader(), new Class<?>[] { operationRef }, handler); } else if (isSelfMethod(method)) { return invokeSelfMethod(method, args); } else { throw new NoSuchMethodException(method.getName()); } }