Java tutorial
/* * Copyright 2010 BigData Mx * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Inspired by python-calais (http://code.google.com/p/python-calais/) */ package mx.bigdata.jcalais.rest; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Reader; import java.io.StringReader; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.util.Formatter; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.UUID; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; import org.xml.sax.InputSource; import org.w3c.dom.Document; import org.w3c.dom.Node; import com.google.common.base.Strings; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.io.ByteStreams; import com.google.common.io.CharStreams; import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.map.ObjectMapper; import mx.bigdata.jcalais.CalaisClient; import mx.bigdata.jcalais.CalaisConfig; import mx.bigdata.jcalais.CalaisConfig.ConnParam; import mx.bigdata.jcalais.CalaisConfig.ProcessingParam; import mx.bigdata.jcalais.CalaisConfig.UserParam; import mx.bigdata.jcalais.CalaisException; import mx.bigdata.jcalais.CalaisObject; import mx.bigdata.jcalais.CalaisResponse; public final class CalaisRestClient implements CalaisClient { private static final String RESOURCE = "http://api.opencalais.com/enlighten/rest/"; private static final String TYPE = "application/x-www-form-urlencoded"; private static final int MAX_CONTENT_SIZE = 100000; private final ObjectMapper mapper = new ObjectMapper(); private final String apiKey; public CalaisRestClient(String apiKey) { this.apiKey = apiKey; } public CalaisResponse analyze(URL url) throws IOException { return analyze(url, new CalaisConfig()); } public CalaisResponse analyze(URL url, CalaisConfig config) throws IOException { config.set(UserParam.EXTERNAL_ID, url.toString()); config.set(ProcessingParam.CONTENT_TYPE, "TEXT/HTML"); return analyze(new InputStreamReader(url.openStream()), config); } public CalaisResponse analyze(Readable readable) throws IOException { return analyze(readable, new CalaisConfig()); } public CalaisResponse analyze(Readable readable, CalaisConfig config) throws IOException { return analyze(CharStreams.toString(readable), config); } public CalaisResponse analyze(String content) throws IOException { return analyze(content, new CalaisConfig()); } public CalaisResponse analyze(String content, CalaisConfig config) throws IOException { if (Strings.isNullOrEmpty(content) || content.length() > MAX_CONTENT_SIZE) { throw new IllegalArgumentException( "Invalid content, either empty or " + "exceeds maximum allowed size"); } Map<String, String> formData = Maps.newHashMap(); formData.put("licenseID", apiKey); formData.put("content", content); formData.put("paramsXML", config.getParamsXml()); String payload = post(formData, config); try { Map<String, Object> map = mapper.readValue(payload, Map.class); return processResponse(map, payload); } catch (JsonParseException e) { throw parseError(payload); } } private String post(Map<String, String> formData, CalaisConfig config) throws IOException { StringBuilder data = new StringBuilder(); for (Map.Entry<String, String> me : formData.entrySet()) { data.append(URLEncoder.encode(me.getKey(), "UTF-8")); data.append("="); data.append(URLEncoder.encode(me.getValue(), "UTF-8")); data.append("&"); } data.deleteCharAt(data.length() - 1); URL url = new URL(RESOURCE); URLConnection conn = url.openConnection(); conn.setConnectTimeout(config.get(ConnParam.CONNECT_TIMEOUT)); conn.setReadTimeout(config.get(ConnParam.READ_TIMEOUT)); conn.setDoOutput(true); OutputStreamWriter out = new OutputStreamWriter(conn.getOutputStream()); try { out.write(data.toString()); out.flush(); } finally { out.close(); } Reader in = new InputStreamReader(conn.getInputStream()); try { return CharStreams.toString(in); } finally { in.close(); } } public static CalaisResponse processResponse(Map<String, Object> map, final String payload) { Map<String, Object> doc = (Map<String, Object>) map.remove("doc"); final CalaisObject info = extractObject(doc, "info"); final CalaisObject meta = extractObject(doc, "meta"); Multimap<String, CalaisObject> hierarchy = createHierarchy(map); final Iterable<CalaisObject> topics = Iterables.unmodifiableIterable(hierarchy.get("topics")); final Iterable<CalaisObject> entities = Iterables.unmodifiableIterable(hierarchy.get("entities")); final Iterable<CalaisObject> relations = Iterables.unmodifiableIterable(hierarchy.get("relations")); final Iterable<CalaisObject> socialTags = Iterables.unmodifiableIterable(hierarchy.get("socialTag")); return new CalaisResponse() { public CalaisObject getInfo() { return info; } public CalaisObject getMeta() { return meta; } public Iterable<CalaisObject> getTopics() { return topics; } public Iterable<CalaisObject> getEntities() { return entities; } public Iterable<CalaisObject> getRelations() { return relations; } public Iterable<CalaisObject> getSocialTags() { return socialTags; } public String getPayload() { return payload; } }; } private static CalaisObject extractObject(Map<String, Object> map, String key) { return new MapBasedCalaisObject((Map<String, Object>) map.remove(key)); } private final static class MapBasedCalaisObject implements CalaisObject { private final Map<String, Object> map; private MapBasedCalaisObject(Map<String, Object> map) { this.map = ImmutableMap.copyOf(map); } public String getField(String field) { Object o = map.get(field); return (o == null) ? null : o.toString(); } public Iterable getList(String field) { Object o = map.get(field); return (o instanceof Iterable) ? Iterables.unmodifiableIterable((Iterable) o) : null; } public Iterable<String> getFieldNames() { return Iterables.unmodifiableIterable(map.keySet()); } public String toString() { return map.toString(); } } private void resolveReferences(Map<String, Object> root) { for (Object o : root.values()) { Map<String, Object> map = (Map<String, Object>) o; for (Map.Entry<String, Object> me : map.entrySet()) { String key = me.getKey(); Object o2 = me.getValue(); if (o2 instanceof String) { String value = (String) o2; if (value.startsWith("http://") && root.containsKey(value)) { map.put(key, root.get(value)); } } } } } private static Multimap<String, CalaisObject> createHierarchy(Map<String, Object> root) { Multimap<String, CalaisObject> result = ArrayListMultimap.create(); for (Map.Entry<String, Object> me : root.entrySet()) { Map<String, Object> map = (Map<String, Object>) me.getValue(); map.put("_uri", me.getKey()); String group = (String) map.get("_typeGroup"); result.put(group, new MapBasedCalaisObject(map)); } return result; } private CalaisException parseError(String error) { try { DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = domFactory.newDocumentBuilder(); StringReader reader = new StringReader(error); Document doc = builder.parse(new InputSource(reader)); XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); String method = (String) xpath.evaluate("/Error/@Method", doc, XPathConstants.STRING); String calaisRequestID = (String) xpath.evaluate("/Error/@calaisRequestID", doc, XPathConstants.STRING); String creationDate = (String) xpath.evaluate("/Error/@CreationDate", doc, XPathConstants.STRING); String calaisVersion = (String) xpath.evaluate("/Error/@CalaisVersion", doc, XPathConstants.STRING); String exception = (String) xpath.evaluate("/Error/Exception/text()", doc, XPathConstants.STRING); return new CalaisException(method, calaisRequestID, creationDate, calaisVersion, exception); } catch (Exception e) { throw new RuntimeException("Unable to parse exception", e); } } }