Java tutorial
/* * This is the MIT license, see also http://www.opensource.org/licenses/mit-license.html * * Copyright (c) 2001 Brian Pitcher * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ // $Header: /cvs/jso-common/src/easyshop/downloadhelper/HttpPageGetter.java,v 1.2 2007/12/16 03:16:37 Administrator Exp $ package easyshop.downloadhelper; import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; import org.apache.commons.httpclient.Credentials; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.UsernamePasswordCredentials; import org.apache.commons.httpclient.auth.AuthScope; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.params.HttpMethodParams; import org.apache.log4j.Logger; import es.Constants; import es.util.http.PageGetter; import es.util.lang.CharTools; import es.webref.model.PageRef; import es.webref.model.WebLink; public class HttpPageGetter extends PageGetter { static Logger log = Logger.getLogger(HttpPageGetter.class.getName()); private int failureCount = 0; private int count = 0; static CharTools tool = new CharTools(); public HttpPageGetter() { super(); } public static void main(String[] args) { HttpPageGetter getter = new HttpPageGetter(); HttpClient client = getter.defaultHttpClient(); OriHttpPage page = getter.getPost(client, "3745"); String context = page.getStringContent(); System.out.println(context); HttpPage page2 = getter.getWebPage("http://www.netsun.com/member/Action.cgi?t=mjk&id=4234123", client); String c2 = page2.getStringContent(); System.out.println(c2); } public HttpPageGetter(String userAgent) { this.userAgent = userAgent; } public OriHttpPage getAuthPost(String urlStr, String userName, String userValue, String pwdName, String pwdValue) { HttpClient client = this.defaultHttpClient(); NameValuePair[] data = { new NameValuePair(userName, userName), new NameValuePair(pwdName, pwdValue), }; return getPost(client, urlStr, data); } public OriHttpPage getPost(HttpClient client, String code) { NameValuePair[] data = { new NameValuePair("username", "jan"), new NameValuePair("password", "197675"), new NameValuePair("returl", "http://www.netsun.com/member/Action.cgi?t=mjk&id=4234123"), new NameValuePair("f", "login"), new NameValuePair("v_id", "161073"), new NameValuePair("v_secret", code), new NameValuePair("v_digest", "6bfd3517e5bd86ed1d24520600f62ff4") }; String urlstr = "http://www.netsun.com/member/index.cgi"; return getPost(client, urlstr, data); } public OriHttpPage getPost(String action, NameValuePair[] data) { return getPost(this.defaultHttpClient(), action, data); } public OriHttpPage getPost(HttpClient client, String action, NameValuePair[] data) { PostMethod post = new PostMethod(action); post.setRequestBody(data); try { int c = client.executeMethod(post); BufferedInputStream remoteBIS = new BufferedInputStream(post.getResponseBodyAsStream()); ByteArrayOutputStream baos = new ByteArrayOutputStream(10240); byte[] buf = new byte[1024]; int bytesRead = 0; while (bytesRead >= 0) { baos.write(buf, 0, bytesRead); bytesRead = remoteBIS.read(buf); } remoteBIS.close(); byte[] content = baos.toByteArray(); // byte[] content=get.getResponseBody(); ConnResponse conRes = new ConnResponse(post.getResponseHeader("Content-type").getValue(), null, 0, 0, post.getStatusCode()); return new OriHttpPage(action, content, conRes, Constants.CHARTSET_DEFAULT); } catch (IOException ioe) { log.warn("Caught IO Exception: " + ioe.getMessage(), ioe); ioe.printStackTrace(); failureCount++; ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new OriHttpPage(null, null); } } public OriHttpPage getOriHttpPage(PageRef thisRef, HttpClient client) { return getOriHttpPage(thisRef, client, null); } public OriHttpPage getOriHttpPage(PageRef thisRef, HttpClient client, String charSet) { HttpPage hpage = getHttpPage(thisRef, client, charSet); return new OriHttpPage(thisRef.getRefId(), thisRef.getUrlStr(), hpage.getContent(), null, hpage.getResponse(), hpage.getCharSet()); } public HttpPage getHttpPageWithDefaultHttpClient(PageRef thisRef) { return getHttpPage(thisRef, this.defaultHttpClient(), null); } public HttpPage getAuthHttpPage(String urStr, String userName, String password) { return getAuthWebPage(urStr, this.defaultHttpClient(), userName, password); } public HttpPage getHttpPage(PageRef thisRef, HttpClient client) { return getHttpPage(thisRef, client, null); } public HttpPage getHttpPage(PageRef thisRef, HttpClient client, String charSet) { client.getParams().setParameter(HttpMethodParams.USER_AGENT, HTTP_USER_AGENT); //?IE String urlStr = thisRef.getUrlStr(); // try { // urlStr=new String(urlStr.getBytes("utf-8"),"gb2312"); // } catch (UnsupportedEncodingException e1) { // // log error here // log.error(e1.getMessage()); // } // System.out.println(urlStr); // get.setRequestHeader("connection","keep-alive"); GetMethod get = null; try { get = new GetMethod(urlStr); get.setFollowRedirects(true); //???? long startTime = System.currentTimeMillis(); int iGetResultCode = client.executeMethod(get); Header[] rheaders = get.getRequestHeaders(); Header[] headers = get.getResponseHeaders(); boolean is11 = get.isHttp11(); boolean redirect = get.getFollowRedirects(); if (get.getResponseContentLength() >= 2024000) { log.info("content is too large, can't download!"); ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new OriHttpPage(-1, urlStr, null, null, conRes, null); } BufferedInputStream remoteBIS = new BufferedInputStream(get.getResponseBodyAsStream()); ByteArrayOutputStream baos = new ByteArrayOutputStream(10240); byte[] buf = new byte[1024]; int bytesRead = 0; while (bytesRead >= 0) { baos.write(buf, 0, bytesRead); bytesRead = remoteBIS.read(buf); } remoteBIS.close(); byte[] content = baos.toByteArray(); // byte[] content=get.getResponseBody(); long timeTaken = System.currentTimeMillis() - startTime; if (timeTaken < 100) timeTaken = 500; int bytesPerSec = (int) ((double) content.length / ((double) timeTaken / 1000.0)); // log.info("Downloaded " + content.length + " bytes, " + bytesPerSec + " bytes/sec"); // log.info("urlstr:"+urlStr); ConnResponse conRes = new ConnResponse(get.getResponseHeader("Content-type").getValue(), null, 0, 0, get.getStatusCode()); String charset = conRes.getCharSet(); if (charset == null) { String cc = new String(content); if (cc.indexOf("content=\"text/html; charset=gb2312") > 0) charset = "gb2312"; else if (cc.indexOf("content=\"text/html; charset=utf-8") > 0) charset = "utf-8"; else if (cc.indexOf("content=\"text/html; charset=gbk") > 0) charset = "gbk"; } return new HttpPage(urlStr, content, conRes, charset); } catch (IOException ioe) { log.warn("Caught IO Exception: " + ioe.getMessage(), ioe); failureCount++; ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new OriHttpPage(-1, urlStr, null, null, conRes, null); } catch (Exception e) { log.warn("Caught Exception: " + e.getMessage(), e); failureCount++; ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new OriHttpPage(-1, urlStr, null, null, conRes, null); } finally { get.releaseConnection(); } /**/ } public OriHttpPage getOriHttpPage(PageRef ref) { HttpPage page = getDHttpPage(ref, "utf-8"); return new OriHttpPage(ref.getRefId(), page.getUrlStr(), page.getContent(), null, page.getResponse(), page.getCharSet()); } public OriHttpPage getOriHttpPage(PageRef ref, String charSet) { HttpPage page = getDHttpPage(ref, charSet); return new OriHttpPage(ref.getRefId(), page.getUrlStr(), page.getContent(), null, page.getResponse(), page.getCharSet()); } public HttpPage getURLOnly(WebLink url) { log.debug("getURL(" + url + ")"); if (url.getUrlStr() == null) { ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new HttpPage(null, null, conRes, null); } URL requestedURL = null; URL referer = null; try { requestedURL = new URL(url.getUrlStr()); log.debug("Creating HTTP connection to " + requestedURL); HttpURLConnection conn = (HttpURLConnection) requestedURL.openConnection(); if (referer != null) { log.debug("Setting Referer header to " + referer); conn.setRequestProperty("Referer", referer.toExternalForm()); } if (userAgent != null) { log.debug("Setting User-Agent to " + userAgent); conn.setRequestProperty("User-Agent", userAgent); } conn.setUseCaches(false); log.debug("Opening URL"); long startTime = System.currentTimeMillis(); conn.connect(); String resp = conn.getResponseMessage(); log.debug("Remote server response: " + resp); int code = conn.getResponseCode(); if (code != 200) { log.error("Could not get connection for code=" + code); System.err.println("Could not get connection for code=" + code); } ConnResponse conRes = new ConnResponse(conn.getContentType(), null, 0, 0, code); conn.disconnect(); return new HttpPage(requestedURL.toExternalForm(), null, conRes, conRes.getCharSet()); } catch (IOException ioe) { log.warn("Caught IO Exception: " + ioe.getMessage(), ioe); failureCount++; ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new HttpPage(requestedURL.toExternalForm(), null, conRes, null); } catch (Exception e) { log.warn("Caught Exception: " + e.getMessage(), e); failureCount++; ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new HttpPage(requestedURL.toExternalForm(), null, conRes, null); } } public HttpPage getWebPage(String urlStr) { PageRef ref = new PageRef(urlStr); return getDHttpPage(ref, null); } public HttpPage getWebPageWithHttpClient(String urlStr) { return getWebPage(urlStr, defaultHttpClient()); } public HttpPage getWebPage(String urlStr, HttpClient client) { PageRef ref = new PageRef(urlStr); return getHttpPage(ref, client); } public HttpPage getAuthWebPage(String urlStr, HttpClient client, String userName, String password) { PageRef ref = new PageRef(urlStr); if (client != null) { client.getParams().setAuthenticationPreemptive(true); Credentials defaultcreds = new UsernamePasswordCredentials(userName, password); client.getState().setCredentials(new AuthScope("taobao.com", 80, AuthScope.ANY_REALM), defaultcreds); } return getHttpPage(ref, client); } public HttpPage getDHttpPage(PageRef url) { return getDHttpPage(url, "gbk"); } public HttpPage getDHttpPage(PageRef url, String charSet) { count++; log.debug("getURL(" + count + ")"); if (url.getUrlStr() == null) { ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new OriHttpPage(-1, null, null, null, conRes, null); } URL requestedURL = null; try { requestedURL = new URL(url.getUrlStr()); } catch (MalformedURLException e1) { // TODO Auto-generated catch block log.error("wrong urlstr" + url.getUrlStr()); ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new OriHttpPage(-1, null, null, null, conRes, null); } ; // System.out.println(""+requestedURL.toExternalForm()); URL referer = null; try { log.debug("Creating HTTP connection to " + requestedURL); HttpURLConnection conn = (HttpURLConnection) requestedURL.openConnection(); if (referer != null) { log.debug("Setting Referer header to " + referer); conn.setRequestProperty("Referer", referer.toExternalForm()); } if (userAgent != null) { log.debug("Setting User-Agent to " + userAgent); conn.setRequestProperty("User-Agent", userAgent); } // DateFormat dateFormat=DateFormat.getDateInstance(); // conn.setRequestProperty("If-Modlfied-Since",dateFormat.parse("2005-08-15 20:18:30").toGMTString()); conn.setUseCaches(false); // conn.setRequestProperty("connection","keep-alive"); for (Iterator it = conn.getRequestProperties().keySet().iterator(); it.hasNext();) { String key = (String) it.next(); if (key == null) { break; } String value = conn.getHeaderField(key); // System.out.println("Request header " + key + ": " + value); } log.debug("Opening URL"); long startTime = System.currentTimeMillis(); conn.connect(); String resp = conn.getResponseMessage(); log.debug("Remote server response: " + resp); int code = conn.getResponseCode(); if (code != 200) { log.error("Could not get connection for code=" + code); System.err.println("Could not get connection for code=" + code); ConnResponse conRes = new ConnResponse(null, null, 0, 0, code); return new HttpPage(requestedURL.toExternalForm(), null, conRes, null); } // if (conn.getContentLength()<=0||conn.getContentLength()>10000000){ // log.error("Content length==0"); // System.err.println("Content length==0"); // ConnResponse conRes=new ConnResponse(null,null,null,0,0,-100); // return new URLObject(-1,requestedURL, null,null,conRes); // } String respStr = conn.getHeaderField(0); long serverDate = conn.getDate(); // log.info("Server response: " + respStr); for (int i = 1; i < conn.getHeaderFields().size(); i++) { String key = conn.getHeaderFieldKey(i); if (key == null) { break; } String value = conn.getHeaderField(key); // System.out.println("Received header " + key + ": " + value); // log.debug("Received header " + key + ": " + value); } // log.debug("Getting buffered input stream from remote connection"); log.debug("start download(" + count + ")"); BufferedInputStream remoteBIS = new BufferedInputStream(conn.getInputStream()); ByteArrayOutputStream baos = new ByteArrayOutputStream(10240); byte[] buf = new byte[1024]; int bytesRead = 0; while (bytesRead >= 0) { baos.write(buf, 0, bytesRead); bytesRead = remoteBIS.read(buf); } // baos.write(remoteBIS.read(new byte[conn.getContentLength()])); // remoteBIS.close(); byte[] content = baos.toByteArray(); long timeTaken = System.currentTimeMillis() - startTime; if (timeTaken < 100) timeTaken = 500; int bytesPerSec = (int) ((double) content.length / ((double) timeTaken / 1000.0)); // log.info("Downloaded " + content.length + " bytes, " + bytesPerSec + " bytes/sec"); if (content.length < conn.getContentLength()) { log.warn("Didn't download full content for URL: " + url); // failureCount++; ConnResponse conRes = new ConnResponse(conn.getContentType(), null, content.length, serverDate, code); return new HttpPage(requestedURL.toExternalForm(), null, conRes, conn.getContentType()); } log.debug("download(" + count + ")"); ConnResponse conRes = new ConnResponse(conn.getContentType(), null, conn.getContentLength(), serverDate, code); String c = charSet; if (c == null) c = conRes.getCharSet(); HttpPage obj = new HttpPage(requestedURL.toExternalForm(), content, conRes, c); return obj; } catch (IOException ioe) { log.warn("Caught IO Exception: " + ioe.getMessage(), ioe); failureCount++; ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new HttpPage(requestedURL.toExternalForm(), null, conRes, null); } catch (Exception e) { log.warn("Caught Exception: " + e.getMessage(), e); failureCount++; ConnResponse conRes = new ConnResponse(null, null, 0, 0, 0); return new HttpPage(requestedURL.toExternalForm(), null, conRes, null); } } }