List of usage examples for java.net URLConnection getContentType
public String getContentType()
From source file:org.ednovo.gooru.application.converter.GooruImageUtil.java
public static String downloadWebResourceToFile(String srcUrl, String outputFolderPath, String fileNamePrefix, String fileExtension) {/*from w ww.j ava2s . co m*/ if (srcUrl == null || outputFolderPath == null || fileNamePrefix == null) { return null; } try { File outputFolder = new File(outputFolderPath); URL url = new URL(srcUrl); URLConnection urlCon = url.openConnection(); InputStream inputStream = urlCon.getInputStream(); if (!outputFolder.exists()) { outputFolder.mkdirs(); } if (fileExtension == null) { fileExtension = getWebFileExtenstion(urlCon.getContentType()); } String destFilePath = outputFolderPath + fileNamePrefix + "." + fileExtension; File outputFile = new File(destFilePath); OutputStream out = new FileOutputStream(outputFile); byte buf[] = new byte[1024]; int len; while ((len = inputStream.read(buf)) > 0) out.write(buf, 0, len); out.close(); inputStream.close(); return destFilePath; } catch (Exception e) { logger.warn("DownloadImage failed:exception:", e); return null; } }
From source file:org.ednovo.gooru.application.util.GooruImageUtil.java
public static String downloadWebResourceToFile(String srcUrl, String outputFolderPath, String fileNamePrefix, String fileExtension) {//from w w w . jav a2s .c o m try { File outputFolder = new File(outputFolderPath); URL url = new URL(srcUrl); URLConnection urlCon = url.openConnection(); InputStream inputStream = urlCon.getInputStream(); if (!outputFolder.exists()) { outputFolder.mkdirs(); } if (fileExtension == null) { fileExtension = getWebFileExtenstion(urlCon.getContentType()); } String destFilePath = outputFolderPath + fileNamePrefix + "_" + UUID.randomUUID().toString() + "." + fileExtension; File outputFile = new File(destFilePath); if (outputFile.exists()) { outputFile.delete(); } OutputStream out = new FileOutputStream(outputFile); byte buf[] = new byte[1024]; int len; while ((len = inputStream.read(buf)) > 0) out.write(buf, 0, len); out.close(); inputStream.close(); return destFilePath; } catch (Exception e) { LOGGER.error("DownloadImage failed:exception:", e); return null; } }
From source file:com.waku.mmdataextract.ComprehensiveSearch.java
public static void saveImage(String imgSrc, String toFileName) { String toFile = "output/images/" + toFileName; if (new File(toFile).exists()) { logger.info("File already saved ->" + toFile); return;// w w w . j a v a2 s .c om } URL u = null; URLConnection uc = null; InputStream raw = null; InputStream in = null; FileOutputStream out = null; try { int endIndex = imgSrc.lastIndexOf("/") + 1; String encodeFileName = URLEncoder.encode(imgSrc.substring(endIndex), "UTF-8").replaceAll("[+]", "%20"); u = new URL("http://shouji.gd.chinamobile.com" + imgSrc.substring(0, endIndex) + encodeFileName); uc = u.openConnection(); String contentType = uc.getContentType(); int contentLength = uc.getContentLength(); if (contentType.startsWith("text/") || contentLength == -1) { logger.error("This is not a binary file. -> " + imgSrc); } raw = uc.getInputStream(); in = new BufferedInputStream(raw); byte[] data = new byte[contentLength]; int bytesRead = 0; int offset = 0; while (offset < contentLength) { bytesRead = in.read(data, offset, data.length - offset); if (bytesRead == -1) break; offset += bytesRead; } if (offset != contentLength) { logger.error("Only read " + offset + " bytes; Expected " + contentLength + " bytes"); } out = new FileOutputStream(toFile); out.write(data); out.flush(); logger.info("Saved file " + u.toString() + " to " + toFile); } catch (Exception e) { e.printStackTrace(); } finally { try { in.close(); } catch (Exception e) { } try { out.close(); } catch (Exception e) { } } }
From source file:de.l3s.boilerpipe.sax.HTMLFetcher.java
public static HTMLDocument fetchHelper(final URL url) throws IOException { final URLConnection conn = url.openConnection(); //conn.setRequestProperty("User-Agent", //"Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19"); conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36"); //conn.setRequestProperty("Cookie","wapparam=web2wap; vt=4"); final String ct = conn.getContentType(); if (ct == null || !(ct.equals("text/html") || ct.startsWith("text/html;"))) { //throw new IOException("Unsupported content type: "+ct+ url); System.err.println("WARN: unsupported Content-type: " + ct + url); }/* ww w. j a v a2 s .c om*/ Charset cs = Charset.forName("UTF8"); if (ct != null) { Matcher m = PAT_CHARSET_REX.matcher(ct); if (m.find()) { final String charset = m.group(1); try { cs = Charset.forName(charset); } catch (UnsupportedCharsetException e) { // keep default } } } InputStream in = conn.getInputStream(); final String encoding = conn.getContentEncoding(); if (encoding != null) { if ("gzip".equalsIgnoreCase(encoding)) { in = new GZIPInputStream(in); } else { System.err.println("WARN: unsupported Content-Encoding: " + encoding); } } ByteArrayOutputStream bos = new ByteArrayOutputStream(); byte[] buf = new byte[4096]; int r; while ((r = in.read(buf)) != -1) { bos.write(buf, 0, r); } in.close(); final byte[] data = bos.toByteArray(); return new HTMLDocument(data, cs); }
From source file:gate.DocumentFormat.java
/** * Returns a MymeType having as input a URL object. If the MimeType wasn't * recognized it returns <b>null</b>. * @param url The URL object from which the MimeType will be extracted * @return A MimeType object for that URL, or <b>null</b> if the Mime Type is * unknown./*from w ww. java 2 s . com*/ */ static private MimeType getMimeType(URL url) { String mimeTypeString = null; String charsetFromWebServer = null; String contentType = null; InputStream is = null; MimeType mimeTypeFromWebServer = null; MimeType mimeTypeFromFileSuffix = null; MimeType mimeTypeFromMagicNumbers = null; if (url == null) return null; // Ask the web server for the content type // We expect to get contentType something like this: // "text/html; charset=iso-8859-1" // Charset is optional try { try { URLConnection urlconn = url.openConnection(); is = urlconn.getInputStream(); contentType = urlconn.getContentType(); } catch (IOException e) { // Failed to get the content type with te Web server. // Let's try some other methods like FileSuffix or magic numbers. } // If a content Type was returned by the server, try to get the mime Type // string // If contentType is something like this:"text/html; charset=iso-8859-1" // try to get content Type string (text/html) if (contentType != null) { StringTokenizer st = new StringTokenizer(contentType, ";"); // We assume that the first token is the mime type string... // If this doesn't happen then BAD LUCK :(( ... if (st.hasMoreTokens()) mimeTypeString = st.nextToken().toLowerCase(); // The next token it should be the CharSet if (st.hasMoreTokens()) charsetFromWebServer = st.nextToken().toLowerCase(); if (charsetFromWebServer != null) { //We have something like : "charset=iso-8859-1" and let's extract the // encoding. st = new StringTokenizer(charsetFromWebServer, "="); // Don't need this anymore charsetFromWebServer = null; // Discarding the first token which is : "charset" if (st.hasMoreTokens()) st.nextToken(); // Get the encoding : "ISO-8859-1" if (st.hasMoreTokens()) charsetFromWebServer = st.nextToken().toUpperCase(); } // End if } // end if // Return the corresponding MimeType with WebServer from the associated MAP mimeTypeFromWebServer = mimeString2mimeTypeMap.get(mimeTypeString); // Let's try a file suffix detection // mimeTypeFromFileSuffix = getMimeType(getFileSuffix(url)); for (String suffix : getFileSuffixes(url)) { mimeTypeFromFileSuffix = getMimeType(suffix); if (mimeTypeFromFileSuffix != null) break; } // Let's perform a magic numbers guess.. mimeTypeFromMagicNumbers = guessTypeUsingMagicNumbers(is, charsetFromWebServer); } finally { IOUtils.closeQuietly(is); //null safe } //All those types enter into a deciding system return decideBetweenThreeMimeTypes(mimeTypeFromWebServer, mimeTypeFromFileSuffix, mimeTypeFromMagicNumbers); }
From source file:org.wso2.carbon.cloud.gateway.agent.CGAgentUtils.java
private static OMNode readNonXML(URL url) throws CGException { try {/*w w w. ja v a 2s . co m*/ // Open a new connection URLConnection newConnection = getURLConnection(url); if (newConnection == null) { if (log.isDebugEnabled()) { log.debug("Cannot create a URLConnection for given URL : " + url); } return null; } BufferedInputStream newInputStream = new BufferedInputStream(newConnection.getInputStream()); OMFactory omFactory = OMAbstractFactory.getOMFactory(); return omFactory.createOMText( new DataHandler(new SynapseBinaryDataSource(newInputStream, newConnection.getContentType())), true); } catch (IOException e) { throw new CGException("Error when getting a stream from resource's content", e); } }
From source file:jfix.util.Urls.java
/** * Returns content from given url as string. The url can contain * username:password after the protocol, so that basic authorization is * possible./*from www.j a v a 2 s . c o m*/ * * Example for url with basic authorization: * * http://username:password@www.domain.org/index.html */ public static String readString(String url, int timeout) { Reader reader = null; try { URLConnection uc = new URL(url).openConnection(); if (uc instanceof HttpURLConnection) { HttpURLConnection httpConnection = (HttpURLConnection) uc; httpConnection.setConnectTimeout(timeout * 1000); httpConnection.setReadTimeout(timeout * 1000); } Matcher matcher = Pattern.compile("://(\\w+:\\w+)@").matcher(url); if (matcher.find()) { String auth = matcher.group(1); String encoding = Base64.getEncoder().encodeToString(auth.getBytes()); uc.setRequestProperty("Authorization", "Basic " + encoding); } String charset = (uc.getContentType() != null && uc.getContentType().contains("charset=")) ? uc.getContentType().split("charset=")[1] : "utf-8"; reader = new BufferedReader(new InputStreamReader(uc.getInputStream(), charset)); StringBuilder sb = new StringBuilder(); for (int chr; (chr = reader.read()) != -1;) { sb.append((char) chr); } return sb.toString(); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { throw new RuntimeException(e.getMessage(), e); } } } }
From source file:com.sxit.crawler.utils.ArchiveUtils.java
/** * Get a BufferedReader on the crawler journal given. * // w w w . java 2 s . co m * @param source URL journal * @return journal buffered reader. * @throws IOException */ public static BufferedReader getBufferedReader(URL source) throws IOException { URLConnection conn = source.openConnection(); boolean isGzipped = conn.getContentType() != null && conn.getContentType().equalsIgnoreCase("application/x-gzip") || conn.getContentEncoding() != null && conn.getContentEncoding().equalsIgnoreCase("gzip"); InputStream uis = conn.getInputStream(); return new BufferedReader( isGzipped ? new InputStreamReader(new GZIPInputStream(uis)) : new InputStreamReader(uis)); }
From source file:com.krawler.esp.servlets.importICSServlet.java
private static boolean getICalFileFromURL(File file, String url, boolean deleteOlderAndCreateNew) throws ServiceException { boolean success = false; InputStream is = null;/*from w ww. ja va2 s. com*/ try { URL u = new URL(url); URLConnection uc = u.openConnection(); is = uc.getInputStream(); if (uc.getContentType().contains("text/calendar")) { if (deleteOlderAndCreateNew) { file.delete(); // delete the file in store as it is an older one } file.createNewFile(); FileOutputStream fop = new FileOutputStream(file); byte[] b = new byte[4096]; int count = 0; while ((count = is.read(b)) >= 0) { fop.write(b, 0, count); } fop.close(); closeInputStream(is); success = true; } else { closeInputStream(is); throw ServiceException.FAILURE("Given calendar URL is not a valid internet calendar.", new Throwable(url)); } } catch (MalformedURLException ex) { throw ServiceException.FAILURE(KWLErrorMsgs.calURLEx, ex); } catch (FileNotFoundException ex) { throw ServiceException.FAILURE(KWLErrorMsgs.calFileEx, ex); } catch (IOException ex) { closeInputStream(is); throw ServiceException.FAILURE(KWLErrorMsgs.calIOEx, ex); } catch (Exception ex) { closeInputStream(is); throw ServiceException.FAILURE(KWLErrorMsgs.calIOEx, ex); } return success; }
From source file:com.servoy.extensions.plugins.http.HttpProvider.java
public static Pair<String, String> getPageDataOldImpl(URL url, int timeout) { StringBuffer sb = new StringBuffer(); String charset = null;//from ww w . ja v a2 s . com try { URLConnection connection = url.openConnection(); if (timeout >= 0) connection.setConnectTimeout(timeout); InputStream is = connection.getInputStream(); final String type = connection.getContentType(); if (type != null) { final String[] parts = type.split(";"); for (int i = 1; i < parts.length && charset == null; i++) { final String t = parts[i].trim(); final int index = t.toLowerCase().indexOf("charset="); if (index != -1) charset = t.substring(index + 8); } } InputStreamReader isr = null; if (charset != null) isr = new InputStreamReader(is, charset); else isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); int read = 0; while ((read = br.read()) != -1) { sb.append((char) read); } br.close(); isr.close(); is.close(); } catch (Exception e) { Debug.error(e); } return new Pair<String, String>(sb.toString(), charset); }