mx.bigdata.jcalais.rest.CalaisRestClient.java Source code

Java tutorial

Introduction

Here is the source code for mx.bigdata.jcalais.rest.CalaisRestClient.java

Source

/*
 *  Copyright 2010 BigData Mx
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 *  Inspired by python-calais (http://code.google.com/p/python-calais/)
 */

package mx.bigdata.jcalais.rest;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.Formatter;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;

import org.xml.sax.InputSource;
import org.w3c.dom.Document;
import org.w3c.dom.Node;

import com.google.common.base.Strings;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.io.ByteStreams;
import com.google.common.io.CharStreams;

import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.ObjectMapper;

import mx.bigdata.jcalais.CalaisClient;
import mx.bigdata.jcalais.CalaisConfig;
import mx.bigdata.jcalais.CalaisConfig.ConnParam;
import mx.bigdata.jcalais.CalaisConfig.ProcessingParam;
import mx.bigdata.jcalais.CalaisConfig.UserParam;
import mx.bigdata.jcalais.CalaisException;
import mx.bigdata.jcalais.CalaisObject;
import mx.bigdata.jcalais.CalaisResponse;

public final class CalaisRestClient implements CalaisClient {

    private static final String RESOURCE = "http://api.opencalais.com/enlighten/rest/";

    private static final String TYPE = "application/x-www-form-urlencoded";

    private static final int MAX_CONTENT_SIZE = 100000;

    private final ObjectMapper mapper = new ObjectMapper();

    private final String apiKey;

    public CalaisRestClient(String apiKey) {
        this.apiKey = apiKey;
    }

    public CalaisResponse analyze(URL url) throws IOException {
        return analyze(url, new CalaisConfig());
    }

    public CalaisResponse analyze(URL url, CalaisConfig config) throws IOException {
        config.set(UserParam.EXTERNAL_ID, url.toString());
        config.set(ProcessingParam.CONTENT_TYPE, "TEXT/HTML");
        return analyze(new InputStreamReader(url.openStream()), config);
    }

    public CalaisResponse analyze(Readable readable) throws IOException {
        return analyze(readable, new CalaisConfig());
    }

    public CalaisResponse analyze(Readable readable, CalaisConfig config) throws IOException {
        return analyze(CharStreams.toString(readable), config);
    }

    public CalaisResponse analyze(String content) throws IOException {
        return analyze(content, new CalaisConfig());
    }

    public CalaisResponse analyze(String content, CalaisConfig config) throws IOException {
        if (Strings.isNullOrEmpty(content) || content.length() > MAX_CONTENT_SIZE) {
            throw new IllegalArgumentException(
                    "Invalid content, either empty or " + "exceeds maximum allowed size");
        }
        Map<String, String> formData = Maps.newHashMap();
        formData.put("licenseID", apiKey);
        formData.put("content", content);
        formData.put("paramsXML", config.getParamsXml());
        String payload = post(formData, config);
        try {
            Map<String, Object> map = mapper.readValue(payload, Map.class);
            return processResponse(map, payload);
        } catch (JsonParseException e) {
            throw parseError(payload);
        }
    }

    private String post(Map<String, String> formData, CalaisConfig config) throws IOException {
        StringBuilder data = new StringBuilder();
        for (Map.Entry<String, String> me : formData.entrySet()) {
            data.append(URLEncoder.encode(me.getKey(), "UTF-8"));
            data.append("=");
            data.append(URLEncoder.encode(me.getValue(), "UTF-8"));
            data.append("&");
        }
        data.deleteCharAt(data.length() - 1);
        URL url = new URL(RESOURCE);
        URLConnection conn = url.openConnection();
        conn.setConnectTimeout(config.get(ConnParam.CONNECT_TIMEOUT));
        conn.setReadTimeout(config.get(ConnParam.READ_TIMEOUT));
        conn.setDoOutput(true);
        OutputStreamWriter out = new OutputStreamWriter(conn.getOutputStream());
        try {
            out.write(data.toString());
            out.flush();
        } finally {
            out.close();
        }
        Reader in = new InputStreamReader(conn.getInputStream());
        try {
            return CharStreams.toString(in);
        } finally {
            in.close();
        }
    }

    public static CalaisResponse processResponse(Map<String, Object> map, final String payload) {
        Map<String, Object> doc = (Map<String, Object>) map.remove("doc");
        final CalaisObject info = extractObject(doc, "info");
        final CalaisObject meta = extractObject(doc, "meta");
        Multimap<String, CalaisObject> hierarchy = createHierarchy(map);
        final Iterable<CalaisObject> topics = Iterables.unmodifiableIterable(hierarchy.get("topics"));
        final Iterable<CalaisObject> entities = Iterables.unmodifiableIterable(hierarchy.get("entities"));
        final Iterable<CalaisObject> relations = Iterables.unmodifiableIterable(hierarchy.get("relations"));
        final Iterable<CalaisObject> socialTags = Iterables.unmodifiableIterable(hierarchy.get("socialTag"));
        return new CalaisResponse() {
            public CalaisObject getInfo() {
                return info;
            }

            public CalaisObject getMeta() {
                return meta;
            }

            public Iterable<CalaisObject> getTopics() {
                return topics;
            }

            public Iterable<CalaisObject> getEntities() {
                return entities;
            }

            public Iterable<CalaisObject> getRelations() {
                return relations;
            }

            public Iterable<CalaisObject> getSocialTags() {
                return socialTags;
            }

            public String getPayload() {
                return payload;
            }
        };
    }

    private static CalaisObject extractObject(Map<String, Object> map, String key) {
        return new MapBasedCalaisObject((Map<String, Object>) map.remove(key));
    }

    private final static class MapBasedCalaisObject implements CalaisObject {

        private final Map<String, Object> map;

        private MapBasedCalaisObject(Map<String, Object> map) {
            this.map = ImmutableMap.copyOf(map);
        }

        public String getField(String field) {
            Object o = map.get(field);
            return (o == null) ? null : o.toString();
        }

        public Iterable getList(String field) {
            Object o = map.get(field);
            return (o instanceof Iterable) ? Iterables.unmodifiableIterable((Iterable) o) : null;
        }

        public Iterable<String> getFieldNames() {
            return Iterables.unmodifiableIterable(map.keySet());
        }

        public String toString() {
            return map.toString();
        }

    }

    private void resolveReferences(Map<String, Object> root) {
        for (Object o : root.values()) {
            Map<String, Object> map = (Map<String, Object>) o;
            for (Map.Entry<String, Object> me : map.entrySet()) {
                String key = me.getKey();
                Object o2 = me.getValue();
                if (o2 instanceof String) {
                    String value = (String) o2;
                    if (value.startsWith("http://") && root.containsKey(value)) {
                        map.put(key, root.get(value));
                    }
                }
            }
        }
    }

    private static Multimap<String, CalaisObject> createHierarchy(Map<String, Object> root) {
        Multimap<String, CalaisObject> result = ArrayListMultimap.create();
        for (Map.Entry<String, Object> me : root.entrySet()) {
            Map<String, Object> map = (Map<String, Object>) me.getValue();
            map.put("_uri", me.getKey());
            String group = (String) map.get("_typeGroup");
            result.put(group, new MapBasedCalaisObject(map));
        }
        return result;
    }

    private CalaisException parseError(String error) {
        try {
            DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = domFactory.newDocumentBuilder();
            StringReader reader = new StringReader(error);
            Document doc = builder.parse(new InputSource(reader));
            XPathFactory factory = XPathFactory.newInstance();
            XPath xpath = factory.newXPath();
            String method = (String) xpath.evaluate("/Error/@Method", doc, XPathConstants.STRING);
            String calaisRequestID = (String) xpath.evaluate("/Error/@calaisRequestID", doc, XPathConstants.STRING);
            String creationDate = (String) xpath.evaluate("/Error/@CreationDate", doc, XPathConstants.STRING);
            String calaisVersion = (String) xpath.evaluate("/Error/@CalaisVersion", doc, XPathConstants.STRING);
            String exception = (String) xpath.evaluate("/Error/Exception/text()", doc, XPathConstants.STRING);
            return new CalaisException(method, calaisRequestID, creationDate, calaisVersion, exception);
        } catch (Exception e) {
            throw new RuntimeException("Unable to parse exception", e);
        }
    }

}