nl.minbzk.dwr.zoeken.enricher.uploader.ElasticSearchResultUploader.java Source code

Introduction

Here is the source code for nl.minbzk.dwr.zoeken.enricher.uploader.ElasticSearchResultUploader.java
Source

/* Copyright (c) 2010 Ministry of the Interior and Kingdom Relations,
 * the Netherlands. All rights reserved.
 * 
 * This file is part of the MinBZK Search Enricher indexing generator.
 * 
 * Search Enricher is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Search Enricher is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with Search Enricher. If not, see <http://www.gnu.org/licenses/>. */

package nl.minbzk.dwr.zoeken.enricher.uploader;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URLEncoder;
import java.util.Arrays;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import nl.minbzk.dwr.zoeken.enricher.GeneratorResult;
import nl.minbzk.dwr.zoeken.enricher.Uploader;
import nl.minbzk.dwr.zoeken.enricher.generator.MultiGeneratorResult;
import nl.minbzk.dwr.zoeken.enricher.notifier.DirectResultNotifier;
import nl.minbzk.dwr.zoeken.enricher.settings.EnricherJob;
import nl.minbzk.dwr.zoeken.enricher.settings.EnricherSettings;
import nl.minbzk.dwr.zoeken.enricher.settings.EnricherSettings.GeneratorType;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.expression.ExpressionParser;
import org.springframework.expression.spel.standard.SpelExpressionParser;
import org.springframework.expression.spel.support.StandardEvaluationContext;
import org.springframework.http.HttpMethod;
import org.springframework.http.HttpStatus;
import org.springframework.http.client.ClientHttpRequest;
import org.springframework.http.client.ClientHttpResponse;
import org.springframework.util.StringUtils;
import org.springframework.web.client.RequestCallback;
import org.springframework.web.client.ResponseExtractor;
import org.springframework.web.client.RestClientException;
import org.springframework.web.client.RestOperations;
import org.springframework.web.client.RestTemplate;

import static java.lang.String.format;

/**
 * ElasticSearch autn envelope uploader.
 * 
 * @author Jasper van Veghel <j.veghel@rijksoverheid.nl>
 */
public class ElasticSearchResultUploader implements Uploader {
    /**
     * Constants.
     */
    private static final String REFERENCE_FIELD = "id";

    private static final String ES_CLUSTER_NAME_PROPERTY = "cluster.name";

    /**
     * The logger.
     */
    private static Logger logger = LoggerFactory.getLogger(ElasticSearchResultUploader.class);

    /**
     * The ElasticSearch base URI(s).
     */
    private List<String> elasticSearchUris;

    /**
     * The ElasticSearch cluster name.
     */
    private String elasticSearchClusterName;

    /**
     * The REST operations.
     */
    private RestOperations operations = new RestTemplate();

    /**
     * The direct result notifier.
     */
    @Autowired
    private DirectResultNotifier notifier;

    /**
     * Default constructor.
     *
     * @param settings
     * @throws java.net.MalformedURLException
     */
    @Autowired
    public ElasticSearchResultUploader(final EnricherSettings settings) throws MalformedURLException {
        if (!StringUtils.hasText(settings.getElasticSearchUri()))
            return;

        this.elasticSearchUris = Arrays
                .asList(StringUtils.tokenizeToStringArray(settings.getElasticSearchUri(), ",", true, true));
        this.elasticSearchClusterName = StringUtils.hasText(settings.getElasticSearchClusterName())
                ? settings.getElasticSearchClusterName()
                : null;
    }

    /**
     * Determine the database name, based on the composite key, or null if any of the composite key replacements could not be resolved.
     * 
     * XXX: We only consider the replacement values from the first document given.
     * 
     * @param name
     * @param nameComposition
     * @param namePrerequisitesExpression
     * @param documents
     * @return String
     */
    private String determineAlternateDatabaseName(final String name, final String nameComposition,
            final String namePrerequisitesExpression, final List<Map<String, Object>> documents) {
        GregorianCalendar calendar = new GregorianCalendar();

        calendar.setTime(new Date());

        String result;

        result = nameComposition.replace("{name}", name).trim();

        result = result.replace("{year}", format("%04d", calendar.get(calendar.YEAR)));
        result = result.replace("{month}", format("%02d", calendar.get(calendar.MONTH)));

        if (documents.size() > 0) {
            final Map<String, Object> document = documents.get(0);

            while (result.contains("{") && result.indexOf("}") > result.indexOf("{")) {
                String fieldName = result.substring(result.indexOf("{") + 1, result.indexOf("}"));

                if (document.containsKey(fieldName) && !document.get(fieldName).getClass().isArray())
                    result = result.replace("{" + fieldName + "}", document.get(fieldName).toString());
                else {
                    if (logger.isDebugEnabled())
                        logger.debug(format(
                                "Field '%s' was missing from document with ID '%s' - will revert back to default collection '%s'",
                                fieldName, getReference(document), name));

                    return null;
                }
            }

            // Also check the pre-requisite expression - only return a composite database name if it's met

            if (StringUtils.hasText(namePrerequisitesExpression)) {
                ExpressionParser parser = new SpelExpressionParser();

                final Map<String, Object> values = new HashMap<String, Object>();

                // XXX: Always get just the first value

                for (Map.Entry<String, Object> entry : document.entrySet())
                    if (entry.getValue().getClass().isArray())
                        values.put(entry.getKey(), ((Object[]) entry.getValue())[0]);
                    else if (entry.getValue() instanceof List)
                        values.put(entry.getKey(), ((List<Object>) entry.getValue()).get(0));
                    else
                        values.put(entry.getKey(), entry.getValue());

                StandardEvaluationContext context = new StandardEvaluationContext(new Object() {
                    public Map<String, Object> getValues() {
                        return values;
                    }
                });

                if (!parser.parseExpression(namePrerequisitesExpression).getValue(context, Boolean.class)) {
                    if (logger.isDebugEnabled())
                        logger.debug(format(
                                "Pre-requisite expression '%s' failed to match against document with ID '%s' - will revert back to default collection '%s'",
                                namePrerequisitesExpression, getReference(document), name));

                    return null;
                }
            }
        }

        return result;
    }

    /**
     * Determine the document reference.
     *
     * @param document
     * @return String
     */
    private String getReference(final Map<String, Object> document) {
        if (document.containsKey(REFERENCE_FIELD))
            return document.get(REFERENCE_FIELD).toString();
        else if (document.containsKey("key"))
            return document.get("key").toString();
        else if (document.containsKey("dc_identifier"))
            return document.get("dc_identifier").toString();
        else if (document.containsKey("url"))
            return document.get("url").toString();
        else
            return "(unknown)";
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void upload(final EnricherJob job, final GeneratorResult result) throws Exception {
        String indexCollection = job.getDatabaseName();

        // Create a new update request

        List<Map<String, Object>> documents = ((MultiGeneratorResult<Map<String, Object>>) result).getDocuments();

        // Set a collection in case of a (resolvable) composite database name

        if (StringUtils.hasText(job.getDatabaseNameComposition())) {
            String compositeDatabaseName = determineAlternateDatabaseName(job.getDatabaseName(),
                    job.getDatabaseNameComposition(), job.getDatabaseNamePrerequisitesExpression(),
                    ((MultiGeneratorResult<Map<String, Object>>) result).getDocuments());

            if (compositeDatabaseName != null) {
                if (logger.isDebugEnabled())
                    logger.debug(
                            format("Composite database name resolved to collection '%s'", compositeDatabaseName));

                indexCollection = compositeDatabaseName;
            } else {
                if (logger.isDebugEnabled())
                    logger.debug(format(
                            "Composite database name could not be (completely) resolved - will use default collection '%s'",
                            job.getDatabaseName()));
            }
        }

        // Now perform the request

        String elasticSearchUri = getElasticSearchUri();

        ElasticSearchDomainConverter converter = new ElasticSearchDomainConverter(
                retrieveMetaData(elasticSearchUri, job));

        // Bulk it all together

        for (Map<String, Object> document : documents) {
            Map<String, Object> actualDocument = converter.convert(document);

            // Then add it to the index

            final String id = getReference(actualDocument);

            String encodedId = URLEncoder.encode(getReference(actualDocument), "UTF-8");
            String formattedUri = format("http://%s/%s/%s/%s", elasticSearchUri, indexCollection,
                    job.getDatabaseType(), encodedId);

            try {
                ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

                XContentBuilder builder = XContentFactory.jsonBuilder(outputStream);

                try {
                    builder.map(actualDocument);

                    builder.flush();

                    // Now send it on its way

                    final String content = new String(outputStream.toByteArray(), "UTF-8");

                    if (logger.isTraceEnabled())
                        logger.trace(format("Posting document to URL %s ... %s", formattedUri, content));

                    HttpStatus status = operations.execute(formattedUri, HttpMethod.PUT, new RequestCallback() {
                        @Override
                        public void doWithRequest(final ClientHttpRequest request) throws IOException {
                            request.getBody().write(content.getBytes());
                        }
                    }, new ResponseExtractor<HttpStatus>() {
                        @Override
                        public HttpStatus extractData(final ClientHttpResponse response) throws IOException {
                            return response.getStatusCode();
                        }
                    });

                    if (status.equals(HttpStatus.OK) || status.equals(HttpStatus.CREATED))
                        logger.info(format("Successfully added document %s to ElasticSearch index %s", id,
                                indexCollection));
                    else
                        logger.error(
                                format("Failed to add document %s to ElasticSearch index %s", id, indexCollection));
                } finally {
                    builder.close();
                }
            } catch (RestClientException e) {
                logger.error(format("Failed to add document %s to ElasticSearch index %s", id, indexCollection), e);
            } catch (ElasticSearchException e) {
                logger.error(format("Failed to add document %s to ElasticSearch index %s", id, indexCollection), e);
            }
        }

        // Perform direct notification now that the document(s) have been uploaded

        notifier.process(result);
    }

    /**
     * Retrieve the ElasticSearch URI to use.
     *
     * @return String
     */
    private String getElasticSearchUri() {
        // TODO: Make this load-balanced

        return elasticSearchUris.get(0);
    }

    /**
     * Retrieve the mapping metadata from the given job and ElasticSearch URI.
     *
     * @param elasticSearchUri
     * @param job
     * @return Map<String, String>
     */
    private Map<String, String> retrieveMetaData(final String elasticSearchUri, final EnricherJob job) {
        if (job.getMappingMetaData() == null) {
            URI mappingUri = URI.create(format("http://%s/%s/_mapping", elasticSearchUri, job.getDatabaseName()));

            String mappingContent;

            try {
                mappingContent = operations.getForObject(mappingUri, String.class);

                if (logger.isTraceEnabled())
                    logger.info("Mapping content is: " + mappingContent);
            } catch (RestClientException e) {
                throw new IllegalStateException(format(
                        "Unable to retrieve the index meta-data for job %s - maybe it hasn't been created yet?",
                        job.getDatabaseName()));
            }

            // Retrieve the cluster state for the given collection

            Map<String, Object> propertiesMap;

            try {
                XContentParser parser = XContentType.JSON.xContent().createParser(mappingContent);

                propertiesMap = getMapEntry(parser.map(), "properties");
            } catch (IOException e) {
                throw new IllegalStateException(
                        format("Unable to parse the mapping content for database %s", job.getDatabaseName()));
            }

            // Retrieve the index metadata

            if (propertiesMap == null)
                throw new IllegalStateException(format(
                        "Unable to retrieve the index meta-data for job %s - maybe it hasn't been created yet?",
                        job.getDatabaseName()));

            Map<String, String> types = new HashMap<String, String>();

            for (Map.Entry<String, Object> mapping : propertiesMap.entrySet())
                if (mapping.getValue() instanceof Map && ((Map) mapping.getValue()).containsKey("type"))
                    types.put(mapping.getKey(), (String) ((Map) mapping.getValue()).get("type"));
                else
                    types.put(mapping.getKey(), "object");

            job.setMappingMetaData(types);
        }

        return job.getMappingMetaData();
    }

    /**
     * Retrieve the map within the given map who's entry has the given name.
     *
     * @param map
     * @param name
     * @return Map<String, Object>
     */
    private Map<String, Object> getMapEntry(final Map<String, Object> map, final String name) {
        for (Map.Entry<String, Object> entry : map.entrySet()) {
            if (entry.getValue() instanceof Map) {
                Map<String, Object> result;

                if (entry.getKey().equals(name))
                    result = (Map<String, Object>) entry.getValue();
                else
                    result = getMapEntry((Map<String, Object>) entry.getValue(), name);

                if (result != null)
                    return result;
            }
        }

        return null;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void commit(final EnricherJob job) throws Exception {
        String flushUri = format("http://%s/%s/_flush", getElasticSearchUri(), job.getDatabaseName());

        HttpStatus status = operations.execute(flushUri, HttpMethod.POST, null,
                new ResponseExtractor<HttpStatus>() {
                    @Override
                    public HttpStatus extractData(final ClientHttpResponse response) throws IOException {
                        return response.getStatusCode();
                    }
                }, null);

        if (status.value() != HttpStatus.OK.value())
            logger.error("Not all shards could be flushed / committed");
        else {
            if (logger.isInfoEnabled())
                logger.info("All shards were successfully flushed / committed");
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void deleteByDocId(final EnricherJob job, final String[] documents) throws Exception {
        deleteByReference(job, REFERENCE_FIELD, documents);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void deleteByReference(final EnricherJob job, final String field, final String[] documents)
            throws Exception {
        final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

        XContentBuilder builder = XContentFactory.jsonBuilder(outputStream);

        try {
            builder.startObject();

            builder.startObject("bool");
            builder.startArray("should");

            for (String document : documents) {
                builder.startObject();

                builder.startObject("match");
                builder.field(field, document);
                builder.endObject();

                builder.endObject();
            }

            builder.endArray();
            builder.endObject();

            builder.endObject();

            builder.flush();

            // Then post the result

            String queryUri = format("http://%s/%s/%s/_query", getElasticSearchUri(), job.getDatabaseName(),
                    job.getDatabaseType());

            HttpStatus status = operations.execute(queryUri, HttpMethod.DELETE, new RequestCallback() {
                @Override
                public void doWithRequest(final ClientHttpRequest request) throws IOException {
                    request.getBody().write(outputStream.toByteArray());
                }
            }, new ResponseExtractor<HttpStatus>() {
                @Override
                public HttpStatus extractData(final ClientHttpResponse response) throws IOException {
                    return response.getStatusCode();
                }
            }, null);

            if (status.value() == HttpStatus.OK.value())
                logger.info(format("Successfully removed document(s) from ElasticSearch"));
            else
                logger.error(
                        format("Failed to delete document(s) from ElasticSearch %s", status.getReasonPhrase()));
        } finally {
            builder.close();
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public List<GeneratorType> getType() {
        return Arrays.asList(new GeneratorType[] { GeneratorType.ElasticSearch });
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void writeOut(final String databaseName, final PrintWriter writer) {
        writer.write(format("ElasticSearch with database name %s", databaseName));
    }
}