Main.java Source code

Introduction

Here is the source code for Main.java
Source

//package com.java2s;
/**
 * Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.BufferedInputStream;
import java.io.BufferedReader;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import java.io.UnsupportedEncodingException;

import java.net.HttpURLConnection;
import java.net.Proxy;
import java.net.URL;

import java.util.zip.GZIPInputStream;

public class Main {
    public static final String ISO = "ISO-8859-1";
    public static int K4 = 4096;
    final static String DESCRIPTION = "<meta name=\"description\" content=\"";
    final static String DESCRIPTION2 = "<meta name=\"Description\" content=\"";

    /**
     * @return tries to get the title of the specified url. returns an empty string
     * if this failed
     */
    public static String getUrlTitle(String urlAsString, int timeout) {
        return getUrlInfos(urlAsString, timeout)[0];
    }

    public static String[] getUrlInfos(String urlAsString, int timeout) {
        try {
            URL url = new URL(urlAsString);
            //using proxy may increase latency
            HttpURLConnection hConn = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY);
            hConn.setRequestProperty("User-Agent", "Mozilla/5.0 Gecko/20100915 Firefox/3.6.10");

            // on android we got problems because of this
            // so disable that for now
            //            hConn.setRequestProperty("Accept-Encoding", "gzip, deflate");
            hConn.setConnectTimeout(timeout);
            hConn.setReadTimeout(timeout);
            // default length of bufferedinputstream is 8k
            byte[] arr = new byte[K4];
            InputStream is = hConn.getInputStream();

            if ("gzip".equals(hConn.getContentEncoding()))
                is = new GZIPInputStream(is);

            BufferedInputStream in = new BufferedInputStream(is, arr.length);
            in.read(arr);

            return getUrlInfosFromText(arr, hConn.getContentType());
        } catch (Exception ex) {
        }
        return new String[] { "", "" };
    }

    public static String getInputStream(InputStream is) throws IOException {
        if (is == null)
            throw new IllegalArgumentException("stream mustn't be null!");

        BufferedReader bufReader = createBuffReader(is);
        StringBuilder sb = new StringBuilder();
        String line;
        while ((line = bufReader.readLine()) != null) {
            sb.append(line);
            sb.append('\n');
        }
        bufReader.close();
        return sb.toString();
    }

    /**
     * Returns title and description of a specified string (as byte array)
     */
    public static String[] getUrlInfosFromText(byte[] arr, String contentType) {
        String res;
        try {
            res = new String(arr, extractEncoding(contentType));
        } catch (Exception ex) {
            res = new String(arr);
        }

        int index = getStartTitleEndPos(res);
        if (index < 0)
            return new String[] { "", "" };

        int encIndex = res.indexOf("charset=");
        if (encIndex > 0) {
            int lastEncIndex = res.indexOf("\"", encIndex + 8);

            // if we have charset="something"
            if (lastEncIndex == encIndex + 8)
                lastEncIndex = res.indexOf("\"", ++encIndex + 8);

            // re-read byte array with different encoding
            if (lastEncIndex > encIndex + 8) {
                try {
                    String encoding = res.substring(encIndex + 8, lastEncIndex);
                    res = new String(arr, encoding);
                } catch (Exception ex) {
                }
                index = getStartTitleEndPos(res);
                if (index < 0)
                    return new String[] { "", "" };
            }
        }

        int lastIndex = res.indexOf("</title>");
        if (lastIndex <= index)
            return new String[] { "", "" };

        String title = res.substring(index, lastIndex);
        index = res.indexOf(DESCRIPTION);
        if (index < 0)
            index = res.indexOf(DESCRIPTION2);

        lastIndex = res.indexOf("\"", index + DESCRIPTION.length());
        if (index < 0 || lastIndex < 0)
            return new String[] { title, "" };

        index += DESCRIPTION.length();
        return new String[] { title, res.substring(index, lastIndex) };
    }

    public static BufferedReader createBuffReader(File file)
            throws FileNotFoundException, UnsupportedEncodingException {
        return new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
    }

    public static BufferedReader createBuffReader(InputStream is)
            throws FileNotFoundException, UnsupportedEncodingException {
        return new BufferedReader(new InputStreamReader(is, "UTF-8"));
    }

    public static String extractEncoding(String contentType) {
        String[] values = contentType.split(";");
        String charset = "";

        for (String value : values) {
            value = value.trim().toLowerCase();

            if (value.startsWith("charset="))
                charset = value.substring("charset=".length());
        }

        // http1.1 says ISO-8859-1 is the default charset
        if (charset.length() == 0)
            charset = ISO;

        return charset;
    }

    public static int getStartTitleEndPos(String res) {
        int index = res.indexOf("<title>");
        if (index < 0) {
            index = res.indexOf("<title ");
            if (index < 0)
                return -1;

            index = res.indexOf(">", index);
            if (index >= 0)
                index++;
        } else
            index += "<title>".length();

        return index;
    }
}