Java tutorial
/* Copyright (c) 2010 Ministry of the Interior and Kingdom Relations, * the Netherlands. All rights reserved. * * This file is part of the MinBZK Search Enricher indexing generator. * * Search Enricher is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Search Enricher is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Search Enricher. If not, see <http://www.gnu.org/licenses/>. */ package nl.minbzk.dwr.zoeken.enricher.uploader; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.net.MalformedURLException; import java.net.URI; import java.net.URLEncoder; import java.util.Arrays; import java.util.Date; import java.util.GregorianCalendar; import java.util.HashMap; import java.util.List; import java.util.Map; import nl.minbzk.dwr.zoeken.enricher.GeneratorResult; import nl.minbzk.dwr.zoeken.enricher.Uploader; import nl.minbzk.dwr.zoeken.enricher.generator.MultiGeneratorResult; import nl.minbzk.dwr.zoeken.enricher.notifier.DirectResultNotifier; import nl.minbzk.dwr.zoeken.enricher.settings.EnricherJob; import nl.minbzk.dwr.zoeken.enricher.settings.EnricherSettings; import nl.minbzk.dwr.zoeken.enricher.settings.EnricherSettings.GeneratorType; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.expression.ExpressionParser; import org.springframework.expression.spel.standard.SpelExpressionParser; import org.springframework.expression.spel.support.StandardEvaluationContext; import org.springframework.http.HttpMethod; import org.springframework.http.HttpStatus; import org.springframework.http.client.ClientHttpRequest; import org.springframework.http.client.ClientHttpResponse; import org.springframework.util.StringUtils; import org.springframework.web.client.RequestCallback; import org.springframework.web.client.ResponseExtractor; import org.springframework.web.client.RestClientException; import org.springframework.web.client.RestOperations; import org.springframework.web.client.RestTemplate; import static java.lang.String.format; /** * ElasticSearch autn envelope uploader. * * @author Jasper van Veghel <j.veghel@rijksoverheid.nl> */ public class ElasticSearchResultUploader implements Uploader { /** * Constants. */ private static final String REFERENCE_FIELD = "id"; private static final String ES_CLUSTER_NAME_PROPERTY = "cluster.name"; /** * The logger. */ private static Logger logger = LoggerFactory.getLogger(ElasticSearchResultUploader.class); /** * The ElasticSearch base URI(s). */ private List<String> elasticSearchUris; /** * The ElasticSearch cluster name. */ private String elasticSearchClusterName; /** * The REST operations. */ private RestOperations operations = new RestTemplate(); /** * The direct result notifier. */ @Autowired private DirectResultNotifier notifier; /** * Default constructor. * * @param settings * @throws java.net.MalformedURLException */ @Autowired public ElasticSearchResultUploader(final EnricherSettings settings) throws MalformedURLException { if (!StringUtils.hasText(settings.getElasticSearchUri())) return; this.elasticSearchUris = Arrays .asList(StringUtils.tokenizeToStringArray(settings.getElasticSearchUri(), ",", true, true)); this.elasticSearchClusterName = StringUtils.hasText(settings.getElasticSearchClusterName()) ? settings.getElasticSearchClusterName() : null; } /** * Determine the database name, based on the composite key, or null if any of the composite key replacements could not be resolved. * * XXX: We only consider the replacement values from the first document given. * * @param name * @param nameComposition * @param namePrerequisitesExpression * @param documents * @return String */ private String determineAlternateDatabaseName(final String name, final String nameComposition, final String namePrerequisitesExpression, final List<Map<String, Object>> documents) { GregorianCalendar calendar = new GregorianCalendar(); calendar.setTime(new Date()); String result; result = nameComposition.replace("{name}", name).trim(); result = result.replace("{year}", format("%04d", calendar.get(calendar.YEAR))); result = result.replace("{month}", format("%02d", calendar.get(calendar.MONTH))); if (documents.size() > 0) { final Map<String, Object> document = documents.get(0); while (result.contains("{") && result.indexOf("}") > result.indexOf("{")) { String fieldName = result.substring(result.indexOf("{") + 1, result.indexOf("}")); if (document.containsKey(fieldName) && !document.get(fieldName).getClass().isArray()) result = result.replace("{" + fieldName + "}", document.get(fieldName).toString()); else { if (logger.isDebugEnabled()) logger.debug(format( "Field '%s' was missing from document with ID '%s' - will revert back to default collection '%s'", fieldName, getReference(document), name)); return null; } } // Also check the pre-requisite expression - only return a composite database name if it's met if (StringUtils.hasText(namePrerequisitesExpression)) { ExpressionParser parser = new SpelExpressionParser(); final Map<String, Object> values = new HashMap<String, Object>(); // XXX: Always get just the first value for (Map.Entry<String, Object> entry : document.entrySet()) if (entry.getValue().getClass().isArray()) values.put(entry.getKey(), ((Object[]) entry.getValue())[0]); else if (entry.getValue() instanceof List) values.put(entry.getKey(), ((List<Object>) entry.getValue()).get(0)); else values.put(entry.getKey(), entry.getValue()); StandardEvaluationContext context = new StandardEvaluationContext(new Object() { public Map<String, Object> getValues() { return values; } }); if (!parser.parseExpression(namePrerequisitesExpression).getValue(context, Boolean.class)) { if (logger.isDebugEnabled()) logger.debug(format( "Pre-requisite expression '%s' failed to match against document with ID '%s' - will revert back to default collection '%s'", namePrerequisitesExpression, getReference(document), name)); return null; } } } return result; } /** * Determine the document reference. * * @param document * @return String */ private String getReference(final Map<String, Object> document) { if (document.containsKey(REFERENCE_FIELD)) return document.get(REFERENCE_FIELD).toString(); else if (document.containsKey("key")) return document.get("key").toString(); else if (document.containsKey("dc_identifier")) return document.get("dc_identifier").toString(); else if (document.containsKey("url")) return document.get("url").toString(); else return "(unknown)"; } /** * {@inheritDoc} */ @Override public void upload(final EnricherJob job, final GeneratorResult result) throws Exception { String indexCollection = job.getDatabaseName(); // Create a new update request List<Map<String, Object>> documents = ((MultiGeneratorResult<Map<String, Object>>) result).getDocuments(); // Set a collection in case of a (resolvable) composite database name if (StringUtils.hasText(job.getDatabaseNameComposition())) { String compositeDatabaseName = determineAlternateDatabaseName(job.getDatabaseName(), job.getDatabaseNameComposition(), job.getDatabaseNamePrerequisitesExpression(), ((MultiGeneratorResult<Map<String, Object>>) result).getDocuments()); if (compositeDatabaseName != null) { if (logger.isDebugEnabled()) logger.debug( format("Composite database name resolved to collection '%s'", compositeDatabaseName)); indexCollection = compositeDatabaseName; } else { if (logger.isDebugEnabled()) logger.debug(format( "Composite database name could not be (completely) resolved - will use default collection '%s'", job.getDatabaseName())); } } // Now perform the request String elasticSearchUri = getElasticSearchUri(); ElasticSearchDomainConverter converter = new ElasticSearchDomainConverter( retrieveMetaData(elasticSearchUri, job)); // Bulk it all together for (Map<String, Object> document : documents) { Map<String, Object> actualDocument = converter.convert(document); // Then add it to the index final String id = getReference(actualDocument); String encodedId = URLEncoder.encode(getReference(actualDocument), "UTF-8"); String formattedUri = format("http://%s/%s/%s/%s", elasticSearchUri, indexCollection, job.getDatabaseType(), encodedId); try { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); XContentBuilder builder = XContentFactory.jsonBuilder(outputStream); try { builder.map(actualDocument); builder.flush(); // Now send it on its way final String content = new String(outputStream.toByteArray(), "UTF-8"); if (logger.isTraceEnabled()) logger.trace(format("Posting document to URL %s ... %s", formattedUri, content)); HttpStatus status = operations.execute(formattedUri, HttpMethod.PUT, new RequestCallback() { @Override public void doWithRequest(final ClientHttpRequest request) throws IOException { request.getBody().write(content.getBytes()); } }, new ResponseExtractor<HttpStatus>() { @Override public HttpStatus extractData(final ClientHttpResponse response) throws IOException { return response.getStatusCode(); } }); if (status.equals(HttpStatus.OK) || status.equals(HttpStatus.CREATED)) logger.info(format("Successfully added document %s to ElasticSearch index %s", id, indexCollection)); else logger.error( format("Failed to add document %s to ElasticSearch index %s", id, indexCollection)); } finally { builder.close(); } } catch (RestClientException e) { logger.error(format("Failed to add document %s to ElasticSearch index %s", id, indexCollection), e); } catch (ElasticSearchException e) { logger.error(format("Failed to add document %s to ElasticSearch index %s", id, indexCollection), e); } } // Perform direct notification now that the document(s) have been uploaded notifier.process(result); } /** * Retrieve the ElasticSearch URI to use. * * @return String */ private String getElasticSearchUri() { // TODO: Make this load-balanced return elasticSearchUris.get(0); } /** * Retrieve the mapping metadata from the given job and ElasticSearch URI. * * @param elasticSearchUri * @param job * @return Map<String, String> */ private Map<String, String> retrieveMetaData(final String elasticSearchUri, final EnricherJob job) { if (job.getMappingMetaData() == null) { URI mappingUri = URI.create(format("http://%s/%s/_mapping", elasticSearchUri, job.getDatabaseName())); String mappingContent; try { mappingContent = operations.getForObject(mappingUri, String.class); if (logger.isTraceEnabled()) logger.info("Mapping content is: " + mappingContent); } catch (RestClientException e) { throw new IllegalStateException(format( "Unable to retrieve the index meta-data for job %s - maybe it hasn't been created yet?", job.getDatabaseName())); } // Retrieve the cluster state for the given collection Map<String, Object> propertiesMap; try { XContentParser parser = XContentType.JSON.xContent().createParser(mappingContent); propertiesMap = getMapEntry(parser.map(), "properties"); } catch (IOException e) { throw new IllegalStateException( format("Unable to parse the mapping content for database %s", job.getDatabaseName())); } // Retrieve the index metadata if (propertiesMap == null) throw new IllegalStateException(format( "Unable to retrieve the index meta-data for job %s - maybe it hasn't been created yet?", job.getDatabaseName())); Map<String, String> types = new HashMap<String, String>(); for (Map.Entry<String, Object> mapping : propertiesMap.entrySet()) if (mapping.getValue() instanceof Map && ((Map) mapping.getValue()).containsKey("type")) types.put(mapping.getKey(), (String) ((Map) mapping.getValue()).get("type")); else types.put(mapping.getKey(), "object"); job.setMappingMetaData(types); } return job.getMappingMetaData(); } /** * Retrieve the map within the given map who's entry has the given name. * * @param map * @param name * @return Map<String, Object> */ private Map<String, Object> getMapEntry(final Map<String, Object> map, final String name) { for (Map.Entry<String, Object> entry : map.entrySet()) { if (entry.getValue() instanceof Map) { Map<String, Object> result; if (entry.getKey().equals(name)) result = (Map<String, Object>) entry.getValue(); else result = getMapEntry((Map<String, Object>) entry.getValue(), name); if (result != null) return result; } } return null; } /** * {@inheritDoc} */ @Override public void commit(final EnricherJob job) throws Exception { String flushUri = format("http://%s/%s/_flush", getElasticSearchUri(), job.getDatabaseName()); HttpStatus status = operations.execute(flushUri, HttpMethod.POST, null, new ResponseExtractor<HttpStatus>() { @Override public HttpStatus extractData(final ClientHttpResponse response) throws IOException { return response.getStatusCode(); } }, null); if (status.value() != HttpStatus.OK.value()) logger.error("Not all shards could be flushed / committed"); else { if (logger.isInfoEnabled()) logger.info("All shards were successfully flushed / committed"); } } /** * {@inheritDoc} */ @Override public void deleteByDocId(final EnricherJob job, final String[] documents) throws Exception { deleteByReference(job, REFERENCE_FIELD, documents); } /** * {@inheritDoc} */ @Override public void deleteByReference(final EnricherJob job, final String field, final String[] documents) throws Exception { final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); XContentBuilder builder = XContentFactory.jsonBuilder(outputStream); try { builder.startObject(); builder.startObject("bool"); builder.startArray("should"); for (String document : documents) { builder.startObject(); builder.startObject("match"); builder.field(field, document); builder.endObject(); builder.endObject(); } builder.endArray(); builder.endObject(); builder.endObject(); builder.flush(); // Then post the result String queryUri = format("http://%s/%s/%s/_query", getElasticSearchUri(), job.getDatabaseName(), job.getDatabaseType()); HttpStatus status = operations.execute(queryUri, HttpMethod.DELETE, new RequestCallback() { @Override public void doWithRequest(final ClientHttpRequest request) throws IOException { request.getBody().write(outputStream.toByteArray()); } }, new ResponseExtractor<HttpStatus>() { @Override public HttpStatus extractData(final ClientHttpResponse response) throws IOException { return response.getStatusCode(); } }, null); if (status.value() == HttpStatus.OK.value()) logger.info(format("Successfully removed document(s) from ElasticSearch")); else logger.error( format("Failed to delete document(s) from ElasticSearch %s", status.getReasonPhrase())); } finally { builder.close(); } } /** * {@inheritDoc} */ @Override public List<GeneratorType> getType() { return Arrays.asList(new GeneratorType[] { GeneratorType.ElasticSearch }); } /** * {@inheritDoc} */ @Override public void writeOut(final String databaseName, final PrintWriter writer) { writer.write(format("ElasticSearch with database name %s", databaseName)); } }