Java tutorial
/* * Copyright 2013 Future TV, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package tv.icntv.log.crawl2.commons; import com.alibaba.fastjson.JSON; import com.google.common.base.Strings; import org.apache.http.*; import org.apache.http.client.HttpResponseException; import org.apache.http.client.ResponseHandler; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.message.BasicHeader; import org.apache.http.message.BasicNameValuePair; import org.apache.http.protocol.HTTP; import org.apache.http.util.EntityUtils; import java.io.IOException; import java.nio.charset.Charset; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Created by Johnson.Liu * <p/> * Author: Johnson.Liu * Date: 2013/10/18 * Time: 15:02 */ public class HttpClientUtil { private static final int CONNECTION_REQUEST_TIMEOUT = 5 * 60 * 1000; private static final int SOCKET_TIMEOUT = 10 * 60 * 1000; private static final String DEFAULT_ENCODING = "utf8"; public static String getContent(String url, Charset charset) throws IOException { HttpGet request = new HttpGet(url); request.setConfig(RequestConfig.custom().setConnectionRequestTimeout(CONNECTION_REQUEST_TIMEOUT) .setSocketTimeout(SOCKET_TIMEOUT).build()); CloseableHttpClient client = HttpClientHolder.getClient(); CloseableHttpResponse response = null; response = client.execute(request); return EntityUtils.toString(response.getEntity(), charset); } /** * Get content by url as string * * @param url original url * @return page content * @throws java.io.IOException */ public static String getContent(String url) throws IOException { // construct request HttpGet request = new HttpGet(url); request.setHeader(new BasicHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")); request.setConfig(RequestConfig.custom().setConnectionRequestTimeout(CONNECTION_REQUEST_TIMEOUT) .setSocketTimeout(SOCKET_TIMEOUT).build()); // construct response handler ResponseHandler<String> handler = new ResponseHandler<String>() { @Override public String handleResponse(final HttpResponse response) throws IOException { StatusLine status = response.getStatusLine(); // status if (status.getStatusCode() != HttpStatus.SC_OK) { throw new HttpResponseException(status.getStatusCode(), status.getReasonPhrase()); } // get encoding in header String encoding = getPageEncoding(response); boolean encodingFounded = true; if (Strings.isNullOrEmpty(encoding)) { encodingFounded = false; encoding = "iso-8859-1"; } // get content and find real encoding HttpEntity entity = response.getEntity(); if (entity == null) { return null; } // get content byte[] contentBytes = EntityUtils.toByteArray(entity); if (contentBytes == null) { return null; } // found encoding if (encodingFounded) { return new String(contentBytes, encoding); } // attempt to discover encoding String rawContent = new String(contentBytes, DEFAULT_ENCODING); Matcher matcher = PATTERN_HTML_CHARSET.matcher(rawContent); if (matcher.find()) { String realEncoding = matcher.group(1); if (!encoding.equalsIgnoreCase(realEncoding)) { // bad luck :( return new String(rawContent.getBytes(encoding), realEncoding); } } // not found right encoding :) return rawContent; } }; // execute CloseableHttpClient client = HttpClientHolder.getClient(); return client.execute(request, handler); } public static String getContentPost(String url, Map<String, String> params) throws IOException { HttpPost httpPost = new HttpPost(url); List<NameValuePair> formParams = new ArrayList<NameValuePair>(); if (params != null) { Set set = params.keySet(); Iterator iterator = set.iterator(); while (iterator.hasNext()) { Object key = iterator.next(); Object value = params.get(key); formParams.add(new BasicNameValuePair(key.toString(), value.toString())); } } httpPost.setHeader(new BasicHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")); httpPost.setEntity(new UrlEncodedFormEntity(formParams, HTTP.UTF_8)); // execute CloseableHttpClient client = HttpClientHolder.getClient(); return EntityUtils.toString(client.execute(httpPost).getEntity(), "utf-8"); } private static final Pattern PATTERN_HTML_CHARSET = Pattern .compile("(?i)\\w+/\\w+;\\s*charset=([a-zA-Z0-9\\-]+)"); private static String getPageEncoding(HttpResponse response) { Header contentType = response.getEntity().getContentType(); if (contentType == null || Strings.isNullOrEmpty(contentType.getValue())) { return null; } Matcher matcher = PATTERN_HTML_CHARSET.matcher(contentType.getValue()); if (matcher.find()) { return matcher.group(1); } return null; } public static void main(String[] args) throws IOException { String url = "http://58.214.17.70:8081/tsop-api/api/getChannelDetails.json?channelId=9"; } }