org.nuxeo.ecm.platform.semanticentities.sources.StanbolEntityHubSource.java Source code

Java tutorial

Introduction

Here is the source code for org.nuxeo.ecm.platform.semanticentities.sources.StanbolEntityHubSource.java

Source

/*
 * (C) Copyright 2010 Nuxeo SA (http://nuxeo.com/) and contributors.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Lesser General Public License
 * (LGPL) version 2.1 which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/lgpl.html
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * Contributors:
 *     Olivier Grisel
 */
package org.nuxeo.ecm.platform.semanticentities.sources;

import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.net.URI;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;

import javax.ws.rs.core.UriBuilder;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.nuxeo.ecm.core.api.Blob;
import org.nuxeo.ecm.core.api.Blobs;
import org.nuxeo.ecm.core.api.ClientException;
import org.nuxeo.ecm.core.api.DocumentModel;
import org.nuxeo.ecm.core.api.model.Property;
import org.nuxeo.ecm.core.api.model.PropertyException;
import org.nuxeo.ecm.core.schema.types.Type;
import org.nuxeo.ecm.core.schema.types.primitives.StringType;
import org.nuxeo.ecm.platform.semanticentities.DereferencingException;
import org.nuxeo.ecm.platform.semanticentities.EntitySuggestion;
import org.nuxeo.ecm.platform.semanticentities.service.ParameterizedHTTPEntitySource;
import org.nuxeo.ecm.platform.semanticentities.service.RemoteEntitySourceDescriptor;

/**
 * Implementation of the RemoteEntitySource interface from the HTTP endpoint of the EntityHub of a Stanbol instance.
 */
public class StanbolEntityHubSource extends ParameterizedHTTPEntitySource {

    public static final String RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";

    private static final Log log = LogFactory.getLog(StanbolEntityHubSource.class);

    protected final ObjectMapper mapper = new ObjectMapper();

    protected String endpointURL;

    public StanbolEntityHubSource() {
        initHttpClient();
    }

    @Override
    public synchronized void setDescriptor(RemoteEntitySourceDescriptor descriptor) {
        this.descriptor = descriptor;
        endpointURL = descriptor.getParameters().get("stanbolURL");
        if ("${org.nuxeo.ecm.platform.semanticentities.stanbolUrl}".equals(endpointURL)) {
            // no property defined, use some default value instead
            endpointURL = "https://stanbol.demo.nuxeo.com";
        }
        if (endpointURL == null || endpointURL.isEmpty()) {
            throw new RuntimeException("stanbolURL parameter is missing for the" + " StanbolEntityHubSource ");
        }
        if (!endpointURL.endsWith("/")) {
            endpointURL += "/";
        }
        endpointURL += "entityhub/";
        String site = descriptor.getParameters().get("site");
        if (site != null) {
            if ("*".equals(site)) {
                endpointURL += "sites/";
            } else {
                endpointURL += "site/" + site + "/";
            }
        }
        log.info(String.format("Configured '%s' to endpoint: '%s'", this.getClass().getName(), endpointURL));
    }

    @SuppressWarnings("unchecked")
    protected Map<String, Object> fetchJSONDescription(URI remoteEntity)
            throws JsonParseException, JsonMappingException, IOException {
        // TODO: make the format configurable and upgrade to JSON-LD once
        // the spec is stabilizing a bit

        // force a %-encoding of the URI that will be passed as a query
        // parameter since the JAX-RS server will decode it (once) while
        // UriBuilder will refuse 'double' encode occurrences of % followed by
        // 2 consecutive hexa digits.
        String encodedResourceUri = URLEncoder.encode(remoteEntity.toString(), "UTF-8");
        URI resourceUri = UriBuilder.fromPath(endpointURL).path("entity").queryParam("id", encodedResourceUri)
                .build();
        return mapper.readValue(doHttpGet(resourceUri, "application/json"), Map.class);
    }

    @SuppressWarnings("unchecked")
    @Override
    public Set<String> getAdmissibleTypes(URI remoteEntity) throws DereferencingException {
        Map<String, Object> representation;
        try {
            representation = (Map<String, Object>) fetchJSONDescription(remoteEntity).get("representation");
        } catch (JsonParseException e) {
            throw new DereferencingException(e);
        } catch (JsonMappingException e) {
            throw new DereferencingException(e);
        } catch (IOException e) {
            throw new DereferencingException(e);
        }
        return getAdmissibleTypes(representation);
    }

    @SuppressWarnings("unchecked")
    protected Set<String> getAdmissibleTypes(Map<String, Object> jsonRepresentation) throws DereferencingException {
        try {
            List<Map<String, String>> typeInfos = (List<Map<String, String>>) jsonRepresentation.get(RDF_TYPE);
            if (typeInfos == null) {
                log.warn("Missing type information in JSON description for " + jsonRepresentation.get("id"));
                return Collections.emptySet();
            }
            Set<String> admissibleTypes = new TreeSet<String>();
            Map<String, String> reverseTypeMapping = descriptor.getReverseMappedTypes();
            for (Map<String, String> typeInfo : typeInfos) {
                String localType = reverseTypeMapping.get(typeInfo.get("value"));
                if (localType != null && !"Entity".equals(localType)) {
                    admissibleTypes.add(localType);
                }
            }
            return admissibleTypes;
        } catch (Exception e) {
            throw new DereferencingException(e);
        }
    }

    @SuppressWarnings("unchecked")
    @Override
    public boolean dereferenceInto(DocumentModel localEntity, URI remoteEntity, boolean override,
            boolean lazyResourceFetch) throws DereferencingException {
        Map<String, Object> representation = Collections.emptyMap();
        try {
            Map<String, Object> jsonDescription = fetchJSONDescription(remoteEntity);
            representation = (Map<String, Object>) jsonDescription.get("representation");
            if (representation == null) {
                throw new DereferencingException("Invalid JSON response from Stanbol server:"
                        + " missing 'representation' key: " + mapper.writeValueAsString(jsonDescription));
            }
            Set<String> possibleTypes = getAdmissibleTypes(representation);
            if (!possibleTypes.contains(localEntity.getType())) {
                throw new DereferencingException(
                        String.format("Remote entity '%s' can be mapped to types:" + " ('%s') but not to '%s'",
                                remoteEntity, StringUtils.join(possibleTypes, "', '"), localEntity.getType()));
            }
            // special handling for the entity:sameas property
            // XXX: the following code should be factorized somewhere
            List<String> samesas = new ArrayList<String>();
            List<String> sameasDisplayLabel = new ArrayList<String>();
            Property sameasProp = localEntity.getProperty("entity:sameas");
            if (sameasProp.getValue() != null) {
                samesas.addAll(sameasProp.getValue(List.class));
            }
            Property sameasDisplayLabelProp = localEntity.getProperty("entity:sameasDisplayLabel");
            if (sameasDisplayLabelProp.getValue() != null) {
                sameasDisplayLabel.addAll(sameasDisplayLabelProp.getValue(List.class));
            }
            if (!samesas.contains(remoteEntity.toString())) {
                samesas.add(remoteEntity.toString());
                localEntity.setPropertyValue("entity:sameas", (Serializable) samesas);

                String titlePropUri = descriptor.getMappedProperties().get("dc:title");
                String label = localEntity.getTitle();
                label = label != null ? label : "Missing label";
                if (titlePropUri != null) {
                    String labelFromRDF = readDecodedLiteral(representation, titlePropUri, StringType.INSTANCE,
                            "en").toString();
                    label = labelFromRDF != null ? labelFromRDF : label;
                }
                sameasDisplayLabel.add(label);
                localEntity.setPropertyValue("entity:sameasDisplayLabel", (Serializable) sameasDisplayLabel);
            }
        } catch (DereferencingException e) {
            throw e;
        } catch (Exception e) {
            throw new DereferencingException(e);
        }
        HashMap<String, String> mapping = new HashMap<String, String>(descriptor.getMappedProperties());
        // as sameas has a special handling, remove it from the list of
        // properties to synchronize the generic way
        mapping.remove("entity:sameas");

        // generic handling of mapped properties
        for (Entry<String, String> mappedProperty : mapping.entrySet()) {
            String localPropertyName = mappedProperty.getKey();
            String remotePropertyUri = mappedProperty.getValue();
            try {
                Property localProperty = localEntity.getProperty(localPropertyName);
                Type type = localProperty.getType();
                if (type.isListType()) {
                    // only synchronize string lists right now
                    List<String> newValues = new ArrayList<String>();
                    if (localProperty.getValue() != null) {
                        newValues.addAll(localProperty.getValue(List.class));
                    }
                    if (override) {
                        newValues.clear();
                    }
                    for (String value : readStringList(representation, remotePropertyUri)) {
                        if (!newValues.contains(value)) {
                            newValues.add(value);
                        }
                    }
                    localEntity.setPropertyValue(localPropertyName, (Serializable) newValues);
                } else {
                    if (localProperty.getValue() == null || "".equals(localProperty.getValue()) || override) {
                        if (type.isComplexType() && "content".equals(type.getName())) {
                            if (lazyResourceFetch) {
                                // TODO: store the resource and property
                                // info in a DocumentModel context data entry to
                                // be used later by the entity serializer
                            } else {
                                Serializable linkedResource = readLinkedResource(representation, remotePropertyUri);
                                if (linkedResource != null) {
                                    localEntity.setPropertyValue(localPropertyName, linkedResource);
                                }
                            }
                        } else {
                            Serializable literal = readDecodedLiteral(representation, remotePropertyUri, type,
                                    "en");
                            if (literal != null) {
                                localEntity.setPropertyValue(localPropertyName, literal);
                            }
                        }
                    }
                }
            } catch (PropertyException e) {
                // ignore missing properties
            } catch (ClientException e) {
                throw new DereferencingException(e);
            }
        }
        return true;
    }

    @SuppressWarnings("unchecked")
    protected Serializable readLinkedResource(Map<String, Object> jsonRepresentation, String propertyUri) {
        // download depictions or other kind of linked resources
        List<Map<String, String>> propInfos = (List<Map<String, String>>) jsonRepresentation.get(propertyUri);
        if (propInfos == null) {
            return null;
        }
        for (Map<String, String> propInfo : propInfos) {
            String contentURI = propInfo.get("value");
            if (contentURI.endsWith(".svg")) {
                // hardcoded skip for vectorial depictions
                return null;
            }
            int lastSlashIndex = contentURI.lastIndexOf('/');
            String filename = null;
            if (lastSlashIndex != -1) {
                filename = contentURI.substring(lastSlashIndex + 1);
            }
            try (InputStream in = doHttpGet(URI.create(contentURI), null)) {
                if (in == null) {
                    log.warn("failed to fetch resource: " + contentURI);
                    return null;
                }
                Blob blob = Blobs.createBlob(in);
                blob.setFilename(filename);
                return (Serializable) blob;
            } catch (IOException e) {
                // DBpedia links to commons.wikimedia.org hosted resources are
                // not always up to date, skip them without crashing
                log.warn(e.getMessage());
                return null;
            }
        }
        return null;
    }

    @SuppressWarnings("unchecked")
    protected List<String> readStringList(Map<String, Object> jsonRepresentation, String propertyUri) {
        Set<String> values = new LinkedHashSet<String>();
        List<Map<String, String>> propInfos = (List<Map<String, String>>) jsonRepresentation.get(propertyUri);
        for (Map<String, String> propInfo : propInfos) {
            String value = propInfo.get("value");
            if (value != null) {
                values.add(value);
            }
        }
        return new ArrayList<String>(values);
    }

    @SuppressWarnings("unchecked")
    protected Serializable readDecodedLiteral(Map<String, Object> jsonRepresentation, String propertyUri, Type type,
            String filterLang) {
        List<Map<String, String>> propInfos = (List<Map<String, String>>) jsonRepresentation.get(propertyUri);
        if (propInfos == null) {
            return null;
        }
        Serializable defaultLiteralValue = null;
        for (Map<String, String> propInfo : propInfos) {
            String lang = propInfo.get("xml:lang");
            if (lang == null) {
                String value = propInfo.get("value");
                defaultLiteralValue = (Serializable) type.decode(value);
                if (defaultLiteralValue instanceof String) {
                    defaultLiteralValue = StringEscapeUtils.unescapeHtml((String) defaultLiteralValue);
                }
            }
            if (lang != null && !filterLang.equals(lang)) {
                continue;
            }
            String value = propInfo.get("value");
            Serializable decoded = (Serializable) type.decode(value);
            if (decoded instanceof String) {
                decoded = StringEscapeUtils.unescapeHtml((String) decoded);
            }
            return decoded;
        }
        return defaultLiteralValue;
    }

    @SuppressWarnings("unchecked")
    @Override
    public List<EntitySuggestion> suggestRemoteEntity(String keywords, String type, int maxSuggestions)
            throws IOException {
        // build a field query on the entity hub
        Map<String, Object> query = new LinkedHashMap<String, Object>();
        List<Map<String, String>> constraints = new ArrayList<Map<String, String>>();
        String namePropertyUri = descriptor.getMappedProperties().get("dc:title");
        Map<String, String> nameTextConstraint = new LinkedHashMap<String, String>();
        nameTextConstraint.put("type", "text");
        nameTextConstraint.put("field", namePropertyUri);
        nameTextConstraint.put("text", keywords);
        constraints.add(nameTextConstraint);
        if (type != null) {
            String remoteType = descriptor.getMappedTypes().get(type);
            if (remoteType == null) {
                return Collections.emptyList();
            }
            Map<String, String> typeReferenceConstraint = new LinkedHashMap<String, String>();
            typeReferenceConstraint.put("type", "reference");
            typeReferenceConstraint.put("field", RDF_TYPE);
            typeReferenceConstraint.put("value", remoteType);
            constraints.add(typeReferenceConstraint);
        }
        List<String> selected = Arrays.asList(namePropertyUri, RDF_TYPE);
        query.put("selected", selected);
        query.put("limit", maxSuggestions);
        query.put("constraints", constraints);
        String queryPayload = mapper.writeValueAsString(query);
        InputStream responseStream = doHttpPost(URI.create(endpointURL + "query"), "application/json",
                "application/json", queryPayload);
        Map<String, Object> response = mapper.readValue(responseStream, Map.class);
        List<Map<String, Object>> results = (List<Map<String, Object>>) response.get("results");
        if (results == null) {
            throw new DereferencingException("Stanbol EntityHub is missing a 'response' key: " + response);
        }
        List<EntitySuggestion> suggestions = new ArrayList<EntitySuggestion>();
        for (Map<String, Object> result : results) {
            Serializable nameLiteral = readDecodedLiteral(result, namePropertyUri, StringType.INSTANCE, "en");
            if (nameLiteral == null) {
                continue;
            }
            String name = nameLiteral.toString();
            String uri = result.get("id").toString();
            if (type == null) {
                Set<String> admissibleTypes = getAdmissibleTypes(result);
                if (admissibleTypes.isEmpty()) {
                    continue;
                }
                // primary type assignment is currently arbitrary: planned fix
                // it to use secondary types with "Entity" as primary types
                // instead
                type = admissibleTypes.iterator().next();
            }
            suggestions.add(new EntitySuggestion(name, uri, type));
        }
        return suggestions;
    }

}