Here you can find the source of getUrlInfos(String urlAsString, int timeout)
public static String[] getUrlInfos(String urlAsString, int timeout)
//package com.java2s; /**//from w ww.j a v a2 s . c o m * Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.BufferedInputStream; import java.net.HttpURLConnection; import java.net.Proxy; import java.net.URL; public class Main { final static String DESCRIPTION = "<meta name=\"description\" content=\""; final static String DESCRIPTION2 = "<meta name=\"Description\" content=\""; public static String[] getUrlInfos(String urlAsString, int timeout) { try { URL url = new URL(urlAsString); //using proxy may increase latency HttpURLConnection hConn = (HttpURLConnection) url .openConnection(Proxy.NO_PROXY); hConn.setRequestProperty("User-Agent", "Mozilla/5.0 Gecko/20100915 Firefox/3.6.10"); hConn.setConnectTimeout(timeout); hConn.setReadTimeout(timeout); // default length of bufferedinputstream is 8k byte[] arr = new byte[4096]; BufferedInputStream in = new BufferedInputStream( hConn.getInputStream(), arr.length); in.read(arr); return getUrlInfosFromText(arr); } catch (Exception ex) { } return new String[] { "", "" }; } /** * Returns title and description of a specified string (as byte array) */ public static String[] getUrlInfosFromText(byte[] arr) { String res = new String(arr); int index = getStartTitleEndPos(res); if (index < 0) return new String[] { "", "" }; int encIndex = res.indexOf("charset="); if (encIndex > 0) { int lastEncIndex = res.indexOf("\"", encIndex + 8); // if we have charset="something" if (lastEncIndex == encIndex + 8) lastEncIndex = res.indexOf("\"", ++encIndex + 8); if (lastEncIndex > encIndex + 8) { String encoding = res.substring(encIndex + 8, lastEncIndex); try { res = new String(arr, encoding); index = getStartTitleEndPos(res); if (index < 0) return new String[] { "", "" }; } catch (Exception ex) { } } } int lastIndex = res.indexOf("</title>"); if (lastIndex <= index) return new String[] { "", "" }; String title = res.substring(index, lastIndex); index = res.indexOf(DESCRIPTION); if (index < 0) index = res.indexOf(DESCRIPTION2); lastIndex = res.indexOf("\"", index + DESCRIPTION.length()); if (index < 0 || lastIndex < 0) return new String[] { title, "" }; index += DESCRIPTION.length(); return new String[] { title, res.substring(index, lastIndex) }; } public static int getStartTitleEndPos(String res) { int index = res.indexOf("<title>"); if (index < 0) { index = res.indexOf("<title "); if (index < 0) return -1; index = res.indexOf(">", index); if (index >= 0) index++; } else index += "<title>".length(); return index; } }