Java tutorial
/* Copyright (c) 2010 Ministry of the Interior and Kingdom Relations, * the Netherlands. All rights reserved. * * This file is part of the MinBZK Search Enricher indexing generator. * * Search Enricher is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Search Enricher is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Search Enricher. If not, see <http://www.gnu.org/licenses/>. */ package nl.minbzk.dwr.zoeken.enricher.service; import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.seajas.search.bridge.enricher.SchemaField; import com.seajas.search.bridge.jms.model.CompositeEntry; import com.seajas.search.bridge.jms.model.EnricherDocument; import java.io.Reader; import java.lang.annotation.Annotation; import java.lang.reflect.Field; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TimeZone; import nl.minbzk.dwr.zoeken.enricher.Generator; import nl.minbzk.dwr.zoeken.enricher.GeneratorResult; import nl.minbzk.dwr.zoeken.enricher.Mapper; import nl.minbzk.dwr.zoeken.enricher.Processor; import nl.minbzk.dwr.zoeken.enricher.ProcessorResult; import nl.minbzk.dwr.zoeken.enricher.Uploader; import nl.minbzk.dwr.zoeken.enricher.aci.ImportEnvelope; import nl.minbzk.dwr.zoeken.enricher.aci.ImportEnvelopeParser; import nl.minbzk.dwr.zoeken.enricher.processor.ProcessorContext; import nl.minbzk.dwr.zoeken.enricher.settings.EnricherJob; import nl.minbzk.dwr.zoeken.enricher.settings.EnricherSettings; import nl.minbzk.dwr.zoeken.enricher.settings.EnricherSettings.GeneratorType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.stereotype.Service; import org.springframework.util.ReflectionUtils; import org.springframework.util.StringUtils; import org.w3c.dom.Document; import org.xml.sax.InputSource; import javax.annotation.PostConstruct; import javax.annotation.Resource; import javax.xml.parsers.DocumentBuilderFactory; import static java.lang.String.format; /** * Search enricher service. * * @author Jasper van Veghel <j.veghel@rijksoverheid.nl> */ @Service public class EnricherService { /** * The logger. */ private static Logger logger = LoggerFactory.getLogger(EnricherService.class); /** * The processor. */ @Autowired private Processor processor; /** * The generators. */ @Resource private List<Generator> generators; /** * The result uploaders. */ @Resource private List<Uploader> uploaders; /** * The generator mappers. */ @Resource private List<Mapper> mappers; /** * The fetch settings. */ @Autowired private EnricherSettings settings; /** * Mongo template. */ @Autowired private MongoTemplate mongoTemplate; /** * Default collection. */ @Value("${bridged.project.mongo.db.collection}") private String defaultCollection; /** * Cache for fields with @see SchemaFeed annotation of enricher document. */ private Map<Field, String> schemaFieldCache = Maps.newHashMap(); /** * Initialization of schema field cache */ @PostConstruct public void postConstruct() { Field[] fields = EnricherDocument.class.getDeclaredFields(); for (Field field : fields) { Annotation annotation = field.getAnnotation(SchemaField.class); if (annotation instanceof SchemaField) { SchemaField schemaField = (SchemaField) annotation; field.setAccessible(true); schemaFieldCache.put(field, schemaField.value()); } } } /** * Process the given import content contained within the reader. * * @param reader * @param jobName * @param referenceEncoding * @return boolean */ public boolean processImport(final Reader reader, final String jobName, final String referenceEncoding) { EnricherJob job = settings.getJob(jobName); // Determine which generator and generators to use, based on the request List<Generator> derivedGenerators = deriveGenerators(job); List<Uploader> derivedUploaders = deriveUploaders(job); Mapper derivedMapper = deriveMapper(job); // The number of generators and uploaders should match if (derivedGenerators.size() != derivedUploaders.size()) throw new IllegalArgumentException("The number of generators does not match the number of uploaders"); // Process the actual input if (logger.isDebugEnabled()) logger.debug(format("Envelope received for job with database name '%s' and encoding %s", job.getDatabaseName(), referenceEncoding != null ? "'" + referenceEncoding + "'" : "(null)")); ProcessorContext processorContext = processor.createContext(); try { // Process and generate envelope results before uploading them List<GeneratorResult> generatorResults = new ArrayList<GeneratorResult>(derivedGenerators.size()); for (int i = 0; i < derivedGenerators.size(); i++) generatorResults.add(derivedGenerators.get(i).createResult(settings, derivedUploaders.get(i))); for (ImportEnvelope envelope : retrieveEnvelopes(reader)) { try { // A job will always be at hand, but a database might not be if (envelope.getDatabase() == null) envelope.setDatabase(job.getDatabaseName()); if (envelope.getDatabase() == null) throw new Exception("No database reference found, either in the envelope or from the job"); // If a reference encoding is given we override any previous setting if (referenceEncoding != null) envelope.setReferenceEncoding(referenceEncoding); ProcessorResult processorResult = processor.process(envelope, settings, job, processorContext); if (processorResult != null) for (int i = 0; i < derivedGenerators.size(); i++) { derivedGenerators.get(i).process(envelope, processorResult, generatorResults.get(i), settings, job); if (derivedMapper != null) derivedMapper.map(job, generatorResults.get(i)); } processor.resetContext(processorContext); } catch (Exception e) { logger.error("Skipping the given envelope due to processing errors", e); } } // Flush any uncommitted uploads for (GeneratorResult generatorResult : generatorResults) generatorResult.flushUploads(job); } catch (Exception e) { logger.error("The given import envelopes could not be processed", e); return false; } finally { // Release any context resources processor.releaseContext(processorContext); } return true; } /** * Process the given delete documents by DocId. * * @param jobName * @param documents * @return boolean */ public boolean processDeleteByDocId(final String jobName, final String[] documents) { EnricherJob job = settings.getJob(jobName); // Determine which uploader to use, based on the request List<Uploader> derivedUploaders = deriveUploaders(job); try { logger.info("Deleting given document(s) by their ID"); for (Uploader uploader : derivedUploaders) uploader.deleteByDocId(job, documents); } catch (Exception e) { logger.error("The given deletes could not be processed", e); return false; } return true; } /** * Process the given delete documents by reference. * * @param jobName * @param field * @param documents * @return boolean */ public boolean processDeleteByReference(final String jobName, final String field, final String[] documents) { EnricherJob job = settings.getJob(jobName); // Determine which uploader to use, based on the request List<Uploader> derivedUploaders = deriveUploaders(job); try { logger.info("Deleting given document(s) by reference (" + field + ")"); for (Uploader uploader : derivedUploaders) uploader.deleteByReference(job, field, documents); } catch (Exception e) { logger.error("The given deletes could not be processed", e); return false; } return true; } /** * Process a commit request for the given job. * * @param jobName * @return boolean */ public boolean processCommit(final String jobName) { EnricherJob job = settings.getJob(jobName); List<Uploader> derivedUploaders = deriveUploaders(job); try { logger.info("Committing using uploader for job " + jobName); for (Uploader uploader : derivedUploaders) uploader.commit(job); } catch (Exception e) { logger.error("The given commit could not be processed", e); return false; } return true; } /** * Handle the given reader as an ACI envelope container. * * @param reader */ private List<ImportEnvelope> retrieveEnvelopes(final Reader reader) throws Exception { Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder() .parse(new InputSource(reader)); List<ImportEnvelope> envelopes = ImportEnvelopeParser.parse(document); postProcessEnvelopes(envelopes); return envelopes; } /** * Post process envelopes to fetch mongoDB data * * @param envelopes */ protected void postProcessEnvelopes(final List<ImportEnvelope> envelopes) { for (ImportEnvelope envelope : envelopes) { if (envelope.getUri() != null && envelope.getUri().startsWith("data://")) { String databaseId = envelope.getUri().substring(7); CompositeEntry compositeEntry = mongoTemplate.findById(databaseId, CompositeEntry.class, defaultCollection); EnricherDocument enricherDocument = compositeEntry.getEnricherDocument(); if (enricherDocument == null) logger.error(format( "Composite entry from data storage does not contain an enricher document (current = %s, failure = %s, id = %s)", compositeEntry.getCurrentState(), compositeEntry.getFailureState(), compositeEntry.getId())); else { // Now set the URI reference to the modified or original content reference envelope.setUri("data://" + (compositeEntry.getModifiedContent() != null ? compositeEntry.getModifiedContent().getId() : compositeEntry.getOriginalContent().getId())); envelope.setReference(enricherDocument.getReference()); envelope.setTitle(enricherDocument.getTitle()); if (envelope.getFields() == null) { envelope.setFields(new HashMap<String, List<String>>()); } for (Field field : schemaFieldCache.keySet()) { String schemaField = schemaFieldCache.get(field); Object value = ReflectionUtils.getField(field, enricherDocument); if (value != null) { this.mapEnricherDocumentToImportEnvelope(envelope, value, schemaField); } } } } } } /** * Reconstruct the import envelope so existing code can be used. * * @param envelope * @param value * @param schemaField * @throws IllegalStateException if field of enricher document can not be mapped to import envelope. */ private void mapEnricherDocumentToImportEnvelope(final ImportEnvelope envelope, final Object value, final String schemaField) { Map<String, List<String>> envelopeFields = envelope.getFields(); if (schemaField.equals("*") && value instanceof Map) { @SuppressWarnings("unchecked") Map<String, String> values = (Map<String, String>) value; Map<String, List<String>> transformedValues = Maps.transformValues(values, new Function<String, List<String>>() { @Override public List<String> apply(String s) { return Lists.newArrayList(s); } }); // create new map to avoid lazy evaluation of function envelopeFields.putAll(Maps.newHashMap(transformedValues)); } else if (value instanceof List) { @SuppressWarnings("unchecked") List<String> values = (List<String>) value; envelopeFields.put(schemaField, values); } else if (value instanceof Date) { SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); dateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); envelope.setSingleValue(schemaField, dateFormat.format(value)); } else if (value instanceof Integer) { envelope.setSingleValue(schemaField, value.toString()); } else if (value instanceof String) { envelope.setSingleValue(schemaField, (String) value); } else { throw new IllegalStateException("Can not apply value '" + value + "' type for schemaField " + schemaField + " to import envelope"); } } /** * Determine the generators for this particular job. * * @param job * @return List<Generator> */ public List<Generator> deriveGenerators(final EnricherJob job) { List<Generator> derivedGenerators = new ArrayList<Generator>(); for (GeneratorType generatorType : job.getGeneratorTypes()) for (Generator potentialGenerator : generators) if (potentialGenerator.getType().contains(generatorType) && !derivedGenerators.contains(potentialGenerator)) derivedGenerators.add(potentialGenerator); if (derivedGenerators.size() == 0) throw new IllegalStateException("No generator could be associated with the given job"); return derivedGenerators; } /** * Determine the uploaders for this particular job. * * @param job * @return List<Uploader> */ public List<Uploader> deriveUploaders(final EnricherJob job) { List<Uploader> derivedUploaders = new ArrayList<Uploader>(); for (GeneratorType generatorType : job.getGeneratorTypes()) for (Uploader potentialUploader : uploaders) if (potentialUploader.getType().contains(generatorType) && !derivedUploaders.contains(potentialUploader)) derivedUploaders.add(potentialUploader); if (derivedUploaders.size() == 0) throw new IllegalStateException("No uploader could be associated with the given job"); return derivedUploaders; } /** * Determine the mapper for this particular job - if any. * * @param job * @return Mapper */ public Mapper deriveMapper(final EnricherJob job) { if (StringUtils.hasText(job.getGeneratorMapping())) for (Mapper mapper : mappers) if (mapper.getName().equalsIgnoreCase(job.getGeneratorMapping())) return mapper; return null; } }