Example usage for java.net URLConnection getContentType

Introduction

In this page you can find the example usage for java.net URLConnection getContentType.

Prototype

public String getContentType()

Source Link

Document

Returns the value of the content-type header field.

Usage

From source file:org.ednovo.gooru.application.converter.GooruImageUtil.java

public static String downloadWebResourceToFile(String srcUrl, String outputFolderPath, String fileNamePrefix,
        String fileExtension) {/*from w  ww.j  ava2s  . co  m*/

    if (srcUrl == null || outputFolderPath == null || fileNamePrefix == null) {
        return null;
    }

    try {
        File outputFolder = new File(outputFolderPath);
        URL url = new URL(srcUrl);
        URLConnection urlCon = url.openConnection();
        InputStream inputStream = urlCon.getInputStream();

        if (!outputFolder.exists()) {
            outputFolder.mkdirs();
        }

        if (fileExtension == null) {
            fileExtension = getWebFileExtenstion(urlCon.getContentType());
        }
        String destFilePath = outputFolderPath + fileNamePrefix + "." + fileExtension;

        File outputFile = new File(destFilePath);

        OutputStream out = new FileOutputStream(outputFile);
        byte buf[] = new byte[1024];
        int len;
        while ((len = inputStream.read(buf)) > 0)
            out.write(buf, 0, len);
        out.close();
        inputStream.close();

        return destFilePath;
    } catch (Exception e) {
        logger.warn("DownloadImage failed:exception:", e);
        return null;
    }
}

From source file:org.ednovo.gooru.application.util.GooruImageUtil.java

public static String downloadWebResourceToFile(String srcUrl, String outputFolderPath, String fileNamePrefix,
        String fileExtension) {//from   w w  w . jav a2s .c o m

    try {

        File outputFolder = new File(outputFolderPath);
        URL url = new URL(srcUrl);
        URLConnection urlCon = url.openConnection();
        InputStream inputStream = urlCon.getInputStream();
        if (!outputFolder.exists()) {
            outputFolder.mkdirs();
        }

        if (fileExtension == null) {
            fileExtension = getWebFileExtenstion(urlCon.getContentType());
        }

        String destFilePath = outputFolderPath + fileNamePrefix + "_" + UUID.randomUUID().toString() + "."
                + fileExtension;
        File outputFile = new File(destFilePath);
        if (outputFile.exists()) {
            outputFile.delete();
        }
        OutputStream out = new FileOutputStream(outputFile);
        byte buf[] = new byte[1024];
        int len;
        while ((len = inputStream.read(buf)) > 0)
            out.write(buf, 0, len);
        out.close();
        inputStream.close();
        return destFilePath;
    } catch (Exception e) {
        LOGGER.error("DownloadImage failed:exception:", e);
        return null;
    }
}

From source file:com.waku.mmdataextract.ComprehensiveSearch.java

public static void saveImage(String imgSrc, String toFileName) {
    String toFile = "output/images/" + toFileName;
    if (new File(toFile).exists()) {
        logger.info("File already saved ->" + toFile);
        return;//  w  w  w .  j a  v a2  s  .c  om
    }
    URL u = null;
    URLConnection uc = null;
    InputStream raw = null;
    InputStream in = null;
    FileOutputStream out = null;
    try {
        int endIndex = imgSrc.lastIndexOf("/") + 1;
        String encodeFileName = URLEncoder.encode(imgSrc.substring(endIndex), "UTF-8").replaceAll("[+]", "%20");
        u = new URL("http://shouji.gd.chinamobile.com" + imgSrc.substring(0, endIndex) + encodeFileName);
        uc = u.openConnection();
        String contentType = uc.getContentType();
        int contentLength = uc.getContentLength();
        if (contentType.startsWith("text/") || contentLength == -1) {
            logger.error("This is not a binary file. -> " + imgSrc);
        }
        raw = uc.getInputStream();
        in = new BufferedInputStream(raw);
        byte[] data = new byte[contentLength];
        int bytesRead = 0;
        int offset = 0;
        while (offset < contentLength) {
            bytesRead = in.read(data, offset, data.length - offset);
            if (bytesRead == -1)
                break;
            offset += bytesRead;
        }
        if (offset != contentLength) {
            logger.error("Only read " + offset + " bytes; Expected " + contentLength + " bytes");
        }
        out = new FileOutputStream(toFile);
        out.write(data);
        out.flush();
        logger.info("Saved file " + u.toString() + " to " + toFile);
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            in.close();
        } catch (Exception e) {
        }
        try {
            out.close();
        } catch (Exception e) {
        }
    }
}

From source file:de.l3s.boilerpipe.sax.HTMLFetcher.java

public static HTMLDocument fetchHelper(final URL url) throws IOException {
    final URLConnection conn = url.openConnection();
    //conn.setRequestProperty("User-Agent",
    //"Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19");
    conn.setRequestProperty("User-Agent",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36");
    //conn.setRequestProperty("Cookie","wapparam=web2wap; vt=4");
    final String ct = conn.getContentType();

    if (ct == null || !(ct.equals("text/html") || ct.startsWith("text/html;"))) {
        //throw new IOException("Unsupported content type: "+ct+ url);
        System.err.println("WARN: unsupported Content-type: " + ct + url);
    }/* ww w.  j a v  a2 s  .c  om*/

    Charset cs = Charset.forName("UTF8");
    if (ct != null) {
        Matcher m = PAT_CHARSET_REX.matcher(ct);
        if (m.find()) {
            final String charset = m.group(1);
            try {
                cs = Charset.forName(charset);
            } catch (UnsupportedCharsetException e) {
                // keep default
            }
        }
    }

    InputStream in = conn.getInputStream();

    final String encoding = conn.getContentEncoding();
    if (encoding != null) {
        if ("gzip".equalsIgnoreCase(encoding)) {
            in = new GZIPInputStream(in);
        } else {
            System.err.println("WARN: unsupported Content-Encoding: " + encoding);
        }
    }

    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    byte[] buf = new byte[4096];
    int r;
    while ((r = in.read(buf)) != -1) {
        bos.write(buf, 0, r);
    }
    in.close();

    final byte[] data = bos.toByteArray();

    return new HTMLDocument(data, cs);
}

From source file:gate.DocumentFormat.java

/**
  * Returns a MymeType having as input a URL object. If the MimeType wasn't
  * recognized it returns <b>null</b>.
  * @param url The URL object from which the MimeType will be extracted
  * @return A MimeType object for that URL, or <b>null</b> if the Mime Type is
  * unknown./*from  w  ww.  java  2 s . com*/
  */
static private MimeType getMimeType(URL url) {
    String mimeTypeString = null;
    String charsetFromWebServer = null;
    String contentType = null;
    InputStream is = null;
    MimeType mimeTypeFromWebServer = null;
    MimeType mimeTypeFromFileSuffix = null;
    MimeType mimeTypeFromMagicNumbers = null;

    if (url == null)
        return null;
    // Ask the web server for the content type
    // We expect to get contentType something like this:
    // "text/html; charset=iso-8859-1"
    // Charset is optional

    try {
        try {
            URLConnection urlconn = url.openConnection();
            is = urlconn.getInputStream();
            contentType = urlconn.getContentType();
        } catch (IOException e) {
            // Failed to get the content type with te Web server.
            // Let's try some other methods like FileSuffix or magic numbers.
        }
        // If a content Type was returned by the server, try to get the mime Type
        // string
        // If contentType is something like this:"text/html; charset=iso-8859-1"
        // try to get content Type string (text/html)
        if (contentType != null) {
            StringTokenizer st = new StringTokenizer(contentType, ";");
            // We assume that the first token is the mime type string...
            // If this doesn't happen then BAD LUCK :(( ...
            if (st.hasMoreTokens())
                mimeTypeString = st.nextToken().toLowerCase();
            // The next token it should be the CharSet
            if (st.hasMoreTokens())
                charsetFromWebServer = st.nextToken().toLowerCase();
            if (charsetFromWebServer != null) {
                //We have something like : "charset=iso-8859-1" and let's extract the
                // encoding.
                st = new StringTokenizer(charsetFromWebServer, "=");
                // Don't need this anymore
                charsetFromWebServer = null;
                // Discarding the first token which is : "charset"
                if (st.hasMoreTokens())
                    st.nextToken();
                // Get the encoding : "ISO-8859-1"
                if (st.hasMoreTokens())
                    charsetFromWebServer = st.nextToken().toUpperCase();
            } // End if
        } // end if
          // Return the corresponding MimeType with WebServer from the associated MAP
        mimeTypeFromWebServer = mimeString2mimeTypeMap.get(mimeTypeString);
        // Let's try a file suffix detection
        // mimeTypeFromFileSuffix = getMimeType(getFileSuffix(url));    
        for (String suffix : getFileSuffixes(url)) {
            mimeTypeFromFileSuffix = getMimeType(suffix);
            if (mimeTypeFromFileSuffix != null)
                break;
        }

        // Let's perform a magic numbers guess..
        mimeTypeFromMagicNumbers = guessTypeUsingMagicNumbers(is, charsetFromWebServer);
    } finally {
        IOUtils.closeQuietly(is); //null safe
    }
    //All those types enter into a deciding system
    return decideBetweenThreeMimeTypes(mimeTypeFromWebServer, mimeTypeFromFileSuffix, mimeTypeFromMagicNumbers);
}

From source file:org.wso2.carbon.cloud.gateway.agent.CGAgentUtils.java

private static OMNode readNonXML(URL url) throws CGException {

    try {/*w w w. ja v  a  2s  .  co  m*/
        // Open a new connection
        URLConnection newConnection = getURLConnection(url);
        if (newConnection == null) {
            if (log.isDebugEnabled()) {
                log.debug("Cannot create a URLConnection for given URL : " + url);
            }
            return null;
        }

        BufferedInputStream newInputStream = new BufferedInputStream(newConnection.getInputStream());

        OMFactory omFactory = OMAbstractFactory.getOMFactory();
        return omFactory.createOMText(
                new DataHandler(new SynapseBinaryDataSource(newInputStream, newConnection.getContentType())),
                true);

    } catch (IOException e) {
        throw new CGException("Error when getting a stream from resource's content", e);
    }
}

From source file:jfix.util.Urls.java

/**
 * Returns content from given url as string. The url can contain
 * username:password after the protocol, so that basic authorization is
 * possible./*from  www.j  a v  a  2 s . c  o  m*/
 * 
 * Example for url with basic authorization:
 * 
 * http://username:password@www.domain.org/index.html
 */
public static String readString(String url, int timeout) {
    Reader reader = null;
    try {
        URLConnection uc = new URL(url).openConnection();
        if (uc instanceof HttpURLConnection) {
            HttpURLConnection httpConnection = (HttpURLConnection) uc;
            httpConnection.setConnectTimeout(timeout * 1000);
            httpConnection.setReadTimeout(timeout * 1000);
        }
        Matcher matcher = Pattern.compile("://(\\w+:\\w+)@").matcher(url);
        if (matcher.find()) {
            String auth = matcher.group(1);
            String encoding = Base64.getEncoder().encodeToString(auth.getBytes());
            uc.setRequestProperty("Authorization", "Basic " + encoding);
        }
        String charset = (uc.getContentType() != null && uc.getContentType().contains("charset="))
                ? uc.getContentType().split("charset=")[1]
                : "utf-8";
        reader = new BufferedReader(new InputStreamReader(uc.getInputStream(), charset));
        StringBuilder sb = new StringBuilder();
        for (int chr; (chr = reader.read()) != -1;) {
            sb.append((char) chr);
        }
        return sb.toString();
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    }
}

From source file:com.sxit.crawler.utils.ArchiveUtils.java

/**
 * Get a BufferedReader on the crawler journal given.
 * //  w  w  w .  java 2 s  .  co  m
 * @param source URL journal
 * @return journal buffered reader.
 * @throws IOException
 */
public static BufferedReader getBufferedReader(URL source) throws IOException {
    URLConnection conn = source.openConnection();
    boolean isGzipped = conn.getContentType() != null
            && conn.getContentType().equalsIgnoreCase("application/x-gzip")
            || conn.getContentEncoding() != null && conn.getContentEncoding().equalsIgnoreCase("gzip");
    InputStream uis = conn.getInputStream();
    return new BufferedReader(
            isGzipped ? new InputStreamReader(new GZIPInputStream(uis)) : new InputStreamReader(uis));
}

From source file:com.krawler.esp.servlets.importICSServlet.java

private static boolean getICalFileFromURL(File file, String url, boolean deleteOlderAndCreateNew)
        throws ServiceException {
    boolean success = false;
    InputStream is = null;/*from  w ww.  ja  va2 s. com*/
    try {
        URL u = new URL(url);
        URLConnection uc = u.openConnection();
        is = uc.getInputStream();
        if (uc.getContentType().contains("text/calendar")) {
            if (deleteOlderAndCreateNew) {
                file.delete(); // delete the file in store as it is an older one
            }
            file.createNewFile();
            FileOutputStream fop = new FileOutputStream(file);
            byte[] b = new byte[4096];
            int count = 0;
            while ((count = is.read(b)) >= 0) {
                fop.write(b, 0, count);
            }
            fop.close();
            closeInputStream(is);
            success = true;
        } else {
            closeInputStream(is);
            throw ServiceException.FAILURE("Given calendar URL is not a valid internet calendar.",
                    new Throwable(url));
        }
    } catch (MalformedURLException ex) {
        throw ServiceException.FAILURE(KWLErrorMsgs.calURLEx, ex);
    } catch (FileNotFoundException ex) {
        throw ServiceException.FAILURE(KWLErrorMsgs.calFileEx, ex);
    } catch (IOException ex) {
        closeInputStream(is);
        throw ServiceException.FAILURE(KWLErrorMsgs.calIOEx, ex);
    } catch (Exception ex) {
        closeInputStream(is);
        throw ServiceException.FAILURE(KWLErrorMsgs.calIOEx, ex);
    }
    return success;
}

From source file:com.servoy.extensions.plugins.http.HttpProvider.java

public static Pair<String, String> getPageDataOldImpl(URL url, int timeout) {
    StringBuffer sb = new StringBuffer();
    String charset = null;//from ww  w  .  ja v a2  s  . com
    try {
        URLConnection connection = url.openConnection();
        if (timeout >= 0)
            connection.setConnectTimeout(timeout);
        InputStream is = connection.getInputStream();
        final String type = connection.getContentType();
        if (type != null) {
            final String[] parts = type.split(";");
            for (int i = 1; i < parts.length && charset == null; i++) {
                final String t = parts[i].trim();
                final int index = t.toLowerCase().indexOf("charset=");
                if (index != -1)
                    charset = t.substring(index + 8);
            }
        }
        InputStreamReader isr = null;
        if (charset != null)
            isr = new InputStreamReader(is, charset);
        else
            isr = new InputStreamReader(is);
        BufferedReader br = new BufferedReader(isr);
        int read = 0;
        while ((read = br.read()) != -1) {
            sb.append((char) read);
        }
        br.close();
        isr.close();
        is.close();
    } catch (Exception e) {
        Debug.error(e);
    }
    return new Pair<String, String>(sb.toString(), charset);
}