Java tutorial
/** /** * villemos solutions [space^] (http://www.villemos.com) * Probe. Send. Act. Emergent solution. * Copyright 2011 Gert Villemos * All Rights Reserved. * * Released under the Apache license, version 2.0 (do what ever * you want, just dont claim ownership). * * NOTICE: All information contained herein is, and remains * the property of villemos solutions, and its suppliers * if any. The intellectual and technical concepts contained * herein are proprietary to villemos solutions * and its suppliers and may be covered by European and Foreign Patents, * patents in process, and are protected by trade secret or copyright law. * * Dissemination of this information or reproduction of this material * is strictly forbidden unless prior written permission is obtained * from villemos solutions. * * And it wouldn't be nice either. * */ package com.villemos.ispace.webster; import java.io.InputStream; import java.net.ProxySelector; import java.net.URI; import java.security.SecureRandom; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.net.ssl.SSLContext; import javax.net.ssl.TrustManager; import org.apache.camel.Exchange; import org.apache.camel.impl.DefaultExchange; import org.apache.camel.impl.DefaultProducer; import org.apache.http.HttpEntity; import org.apache.http.HttpHost; import org.apache.http.HttpResponse; import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.CookieStore; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.params.ClientPNames; import org.apache.http.client.params.CookiePolicy; import org.apache.http.client.protocol.ClientContext; import org.apache.http.conn.ClientConnectionManager; import org.apache.http.conn.params.ConnRoutePNames; import org.apache.http.conn.scheme.PlainSocketFactory; import org.apache.http.conn.scheme.Scheme; import org.apache.http.conn.scheme.SchemeRegistry; import org.apache.http.conn.scheme.SocketFactory; import org.apache.http.conn.ssl.SSLSocketFactory; import org.apache.http.impl.client.BasicCookieStore; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.ProxySelectorRoutePlanner; import org.apache.http.impl.conn.SingleClientConnManager; import org.apache.http.params.BasicHttpParams; import org.apache.http.params.HttpParams; import org.apache.http.protocol.BasicHttpContext; import org.apache.http.protocol.HttpContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.villemos.ispace.api.InformationObject; import com.villemos.ispace.api.ResultSet; import com.villemos.ispace.api.SolrOptions; import com.villemos.ispace.api.Suggestion; import com.villemos.ispace.httpcrawler.EasyX509TrustManager; import com.villemos.ispace.httpcrawler.HttpClientConfigurer; public class WebsterProducer extends DefaultProducer { private static final transient Logger LOG = LoggerFactory.getLogger(WebsterProducer.class); private WebsterEndpoint endpoint; protected DefaultHttpClient client = null; protected CookieStore cookieStore = new BasicCookieStore(); protected HttpHost target = null; protected HttpContext localContext = null; protected boolean ignoreAuthenticationFailure = true; protected Pattern pattern = Pattern.compile("<!--INFOLINKS_ON-->(.*?)<!--INFOLINKS_OFF-->"); protected Pattern spellPattern = Pattern.compile( "<ol id=\"franklin_spelling_help\" class=\"franklin-spelling-help\">(.*?)<div class=\"franklin-promo\"><br />"); public WebsterProducer(WebsterEndpoint endpoint) { super(endpoint); this.endpoint = endpoint; } public void process(Exchange exchange) throws Exception { /** Always ignore authentication protocol errors. */ if (ignoreAuthenticationFailure) { SSLContext sslContext = SSLContext.getInstance("SSL"); // set up a TrustManager that trusts everything sslContext.init(null, new TrustManager[] { new EasyX509TrustManager() }, new SecureRandom()); SchemeRegistry schemeRegistry = new SchemeRegistry(); SSLSocketFactory sf = new SSLSocketFactory(sslContext); Scheme httpsScheme = new Scheme("https", sf, 443); schemeRegistry.register(httpsScheme); SocketFactory sfa = new PlainSocketFactory(); Scheme httpScheme = new Scheme("http", sfa, 80); schemeRegistry.register(httpScheme); HttpParams params = new BasicHttpParams(); ClientConnectionManager cm = new SingleClientConnManager(params, schemeRegistry); client = new DefaultHttpClient(cm, params); } else { client = new DefaultHttpClient(); } String proxyHost = getWebsterEndpoint().getProxyHost(); Integer proxyPort = getWebsterEndpoint().getProxyPort(); if (proxyHost != null && proxyPort != null) { HttpHost proxy = new HttpHost(proxyHost, proxyPort); client.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy); } else { ProxySelectorRoutePlanner routePlanner = new ProxySelectorRoutePlanner( client.getConnectionManager().getSchemeRegistry(), ProxySelector.getDefault()); client.setRoutePlanner(routePlanner); } /** The target location may demand authentication. We setup preemptive authentication. */ if (getWebsterEndpoint().getAuthenticationUser() != null && getWebsterEndpoint().getAuthenticationPassword() != null) { client.getCredentialsProvider().setCredentials( new AuthScope(getWebsterEndpoint().getDomain(), getWebsterEndpoint().getPort()), new UsernamePasswordCredentials(getWebsterEndpoint().getAuthenticationUser(), getWebsterEndpoint().getAuthenticationPassword())); } /** Set default cookie policy and store. Can be overridden for a specific method using for example; * method.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY); */ client.setCookieStore(cookieStore); client.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH); String uriStr = getWebsterEndpoint().getProtocol() + "://" + getWebsterEndpoint().getDomain() + "/" + getWebsterEndpoint().getPath(); if (getWebsterEndpoint().getPort() != 80) { uriStr += ":" + getWebsterEndpoint().getPort() + "/" + getWebsterEndpoint().getPath(); } /** Break the query into its elements and search for each. */ for (String word : ((String) exchange.getIn().getHeader(SolrOptions.query)).split("\\s+")) { uriStr += "/" + word; URI uri = new URI(uriStr); if (getWebsterEndpoint().getPort() != 80) { target = new HttpHost(getWebsterEndpoint().getDomain(), getWebsterEndpoint().getPort(), getWebsterEndpoint().getProtocol()); } else { target = new HttpHost(getWebsterEndpoint().getDomain()); } localContext = new BasicHttpContext(); localContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore); HttpUriRequest method = new HttpGet(uri); HttpResponse response = client.execute(target, method, localContext); if (response.getStatusLine().getStatusCode() == 200) { /** Extract result. */ String page = HttpClientConfigurer.readFully(response.getEntity().getContent()); ResultSet set = new ResultSet(); Matcher matcher = pattern.matcher(page); if (matcher.find()) { String result = matcher.group(1).replaceAll("\\<.*?\\>", "").replaceAll("\\s+", " "); /** Create ResultSet*/ InformationObject io = new InformationObject(); io.hasUri = uriStr; io.fromSource = "Webster"; io.hasTitle = "Webster definition of '" + word + "'."; io.ofEntityType = "Definition"; io.ofMimeType = "text/html"; io.withRawText = result; io.score = 20; set.informationobjects.add(io); } matcher = spellPattern.matcher(page); if (matcher.find()) { String result = matcher.group(1); String[] elements = result.split("<li><a href=.*?>"); for (String element : elements) { if (element.trim().equals("") == false) { set.suggestions .add(new Suggestion(word, element.replaceAll("<.*?>", "").trim(), "Webster")); } } } if (exchange.getIn().getHeader(SolrOptions.stream) != null) { for (InformationObject io : set.informationobjects) { Exchange newExchange = new DefaultExchange(endpoint.getCamelContext()); newExchange.getIn().setBody(io); endpoint.getCamelContext().createProducerTemplate() .send((String) exchange.getIn().getHeader(SolrOptions.stream), newExchange); } for (Suggestion suggestion : set.suggestions) { Exchange newExchange = new DefaultExchange(endpoint.getCamelContext()); newExchange.getIn().setBody(suggestion); endpoint.getCamelContext().createProducerTemplate() .send((String) exchange.getIn().getHeader(SolrOptions.stream), newExchange); } } else { exchange.getOut().setBody(set); } } else { HttpEntity entity = response.getEntity(); InputStream instream = entity.getContent(); String page = HttpClientConfigurer.readFully(response.getEntity().getContent()); System.out.println(page); } } } protected WebsterEndpoint getWebsterEndpoint() { return (WebsterEndpoint) endpoint; } }