Java tutorial
/** * Copyright (C) 2013 Seajas, the Netherlands. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3, as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.seajas.search.profiler.service.repository; import com.mongodb.BasicDBObject; import com.mongodb.DBObject; import com.mongodb.MongoException; import com.mongodb.gridfs.GridFS; import com.mongodb.gridfs.GridFSDBFile; import com.seajas.search.bridge.jms.model.CompositeEntry; import com.seajas.search.bridge.jms.model.state.CompositeState; import java.util.ArrayList; import java.util.Date; import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.lang.StringEscapeUtils; import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.dao.DataAccessException; import org.springframework.data.domain.Sort; import org.springframework.data.mongodb.MongoDbFactory; import org.springframework.data.mongodb.core.DocumentCallbackHandler; import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.index.Index; import org.springframework.data.mongodb.core.query.Criteria; import org.springframework.data.mongodb.core.query.Order; import org.springframework.data.mongodb.core.query.Query; import org.springframework.stereotype.Service; import static org.springframework.data.mongodb.core.query.Criteria.where; /** * MongoDB-based repository service. * * @author Jasper van Veghel <jasper@seajas.com> */ @Service public class RepositoryService { /** * Constants. */ private static final Integer MAX_ACCEPTABLE_RESULT_SIZE = 1000; private static final Long MAX_INDEX_CREATION_COUNT = 1000L; private static final String[] INDEXES = { "currentState", "failureState", "source.id", "source.collection", // "source.resultParameters", // XXX: Don't index this, as it's inefficient and doesn't really work "originalContent.hostname", "originalContent.dateSubmitted", "modifiedContent.dateSubmitted", "enricherDocument._id" }; /** * The logger. */ private static final Logger logger = LoggerFactory.getLogger(RepositoryService.class); /** * The static logger. */ private static final Logger staticLogger = LoggerFactory.getLogger(RepositoryService.class); /** * The MongoDB template. */ private MongoTemplate mongoTemplate; /** * The default collection. */ @Value("${bridged.project.mongo.db.collection}") private String defaultCollection; /** * The GridFS store. */ private GridFS gridFs; /** * Default constructor. * * @param dbFactory */ @Autowired public RepositoryService(final MongoDbFactory dbFactory, final MongoTemplate mongoTemplate) { this.gridFs = new GridFS(dbFactory.getDb()); // Initialize the indexes only if there aren't too many documents in the store yet Long currentCount = mongoTemplate.count(new Query(), CompositeEntry.class); // Determine whether the given indexes have been created, and initialize them if necessary if (currentCount <= MAX_INDEX_CREATION_COUNT) for (String index : INDEXES) mongoTemplate.indexOps(CompositeEntry.class).ensureIndex(new Index().on(index, Order.ASCENDING)); this.mongoTemplate = mongoTemplate; } /** * Retrieve a paged list of all resources within the repository. * * @param collection * @param sourceId * @param taxonomyMatch * @param startDate * @param endDate * @param pagerStart * @param pagerResults * @param parameters * @return RepositoryResult */ public RepositoryResult findResources(final String collection, final Integer sourceId, final String taxonomyMatch, final Date startDate, final Date endDate, final Integer pagerStart, final Integer pagerResults, final Map<String, String> parameters) { Query query = createQuery(false, collection, sourceId, taxonomyMatch, startDate, endDate, null, parameters); query.with(new Sort(Sort.Direction.DESC, "originalContent.dateSubmitted")); if (logger.isInfoEnabled()) logger.info("About to count the number of results - which can potentially take a while - query = " + query.toString()); // First perform a count Long totalResults = mongoTemplate.count(query, defaultCollection); if (logger.isInfoEnabled()) logger.info("Counted " + totalResults + " result(s) to be retrieved from the storage back-end"); // Then add paging parameters to the query query.skip(pagerStart); query.limit(pagerResults); // And build up the result List<RepositoryResource> results = new ArrayList<RepositoryResource>(pagerResults); List<CompositeEntry> entries = mongoTemplate.find(query, CompositeEntry.class, defaultCollection); for (CompositeEntry entry : entries) results.add(new RepositoryResource(entry.getOriginalContent().getUri().toString(), entry.getSource().getCollection(), entry.getSource().getId(), entry.getOriginalContent().getHostname(), entry.getOriginalContent().getDateSubmitted(), entry.getId().toString())); return new RepositoryResult(pagerStart, pagerResults, totalResults, results); } /** * Delete all given resources from the repository. * * @param collection * @param sourceId * @param url * @param startDate * @param endDate * @return boolean */ @SuppressWarnings("deprecation") public boolean deleteResources(final String collection, final Integer sourceId, final String url, final Date startDate, final Date endDate) { try { Query query = createQuery(true, collection, sourceId, null, startDate, endDate, url, null); if (logger.isInfoEnabled()) logger.info("Removing entries from the repository"); // First delete all GridFS references mongoTemplate.executeQuery(query, defaultCollection, new DocumentCallbackHandler() { @Override public void processDocument(final DBObject dbObject) throws MongoException, DataAccessException { if (dbObject.get("originalContent") != null) { ObjectId originalId = (ObjectId) ((BasicDBObject) dbObject.get("originalContent")) .get("_id"); gridFs.remove(originalId); } if (dbObject.get("modifiedContent") != null) { ObjectId modifiedId = (ObjectId) ((BasicDBObject) dbObject.get("modifiedContent")) .get("_id"); gridFs.remove(modifiedId); } } }); // Then delete the repository documents mongoTemplate.remove(query, defaultCollection); return true; } catch (RuntimeException e) { logger.error("Unable to remove the given resource(s) from the repository", e); return false; } } /** * Retrieve a paged list of all resources within the repository. * * @param collection * @param sourceId * @param taxonomyMatch * @return RepositoryStatistic */ public RepositoryStatistic countResources(final String collection, final Integer sourceId, final String taxonomyMatch) { Query query = createQuery(false, collection, sourceId, taxonomyMatch, null, null, null, null); Long totalResults = mongoTemplate.count(query, defaultCollection); Date lastAccessed = null; if (totalResults > 0) lastAccessed = mongoTemplate.findOne(query, CompositeEntry.class, defaultCollection) .getOriginalContent().getDateSubmitted(); return new RepositoryStatistic(totalResults, lastAccessed); } /** * Retrieve a resource using the given document path. * * @param entryId * @return RepositoryContent */ public RepositoryContent getResource(final String entryId) { CompositeEntry entry = mongoTemplate.findById(new ObjectId(entryId), CompositeEntry.class, defaultCollection); if (entry.getOriginalContent() == null) throw new IllegalArgumentException("No original content was set for resource identified by " + entryId); GridFSDBFile dbFile = gridFs.find(entry.getOriginalContent().getId()); return new RepositoryContent(dbFile.getInputStream(), entry.getOriginalContent().getMediaType()); } /** * Process a paged list of all resources within the repository. * * @param collection * @param sourceId * @param taxonomyMatch * @param url * @param startDate * @param endDate * @param parameters * @param rangeStart * @param rangeEnd * @param processor * @return boolean */ public boolean processResources(final String collection, final Integer sourceId, final String taxonomyMatch, final String url, final Date startDate, final Date endDate, final Map<String, String> parameters, final Integer rangeStart, final Integer rangeEnd, final RepositoryProcessor processor) { Query query = createQuery(true, collection, sourceId, taxonomyMatch, startDate, endDate, url, parameters); query.fields().include("_id"); query.fields().include("currentState"); query.fields().include("element.hostname"); // Determine the total number of document this affects final AtomicLong currentResult = new AtomicLong(0L); // Then skip to it and get going query.skip(rangeStart); if (rangeEnd != null) query.limit(rangeEnd - rangeStart); if (logger.isInfoEnabled()) logger.info(String.format("Processing ranges %d to %s of (unknown) results through the given processor", rangeStart, rangeEnd != null ? rangeEnd.toString() : "end")); mongoTemplate.executeQuery(query, defaultCollection, new DocumentCallbackHandler() { @Override public void processDocument(final DBObject dbObject) throws MongoException, DataAccessException { CompositeState currentState = CompositeState.valueOf((String) dbObject.get("currentState")); if (!EnumSet.of(CompositeState.Content, CompositeState.CompletedDocument, CompositeState.InitialDocument).contains(currentState)) { if (logger.isDebugEnabled()) { ObjectId id = (ObjectId) dbObject.get("_id"); logger.debug("Skipping over element with ID '" + id + "' and current state '" + currentState + "'"); } return; } ObjectId id = (ObjectId) dbObject.get("_id"); String hostname = (String) ((BasicDBObject) dbObject.get("element")).get("hostname"); if (logger.isInfoEnabled()) logger.info("Processing re-indexing entry " + currentResult.getAndIncrement() + " / (unknown) with ID '" + id + "' and hostname '" + hostname + "'"); processor.process(id, hostname); } }); return true; } /** * Create a query given any or all of the provided parameters. * * @param allStates * @param collection * @param sourceId * @param taxonomyMatch * @param startDate * @param endDate * @param url * @return Query */ private Query createQuery(final Boolean allStates, final String collection, final Integer sourceId, final String taxonomyMatch, final Date startDate, final Date endDate, final String url, final Map<String, String> parameters) { Query query = new Query(); if (!allStates) query.addCriteria( new Criteria().orOperator(where("currentState").is(CompositeState.CompletedDocument.name()), where("currentState").is(CompositeState.InitialDocument.name()))); if (collection != null) query.addCriteria(where("source.collection").is(collection)); if (sourceId != null) query.addCriteria(where("source.id").is(sourceId)); if (taxonomyMatch != null) query.addCriteria(where("originalContent.hostname").is(taxonomyMatch)); if (startDate != null || endDate != null) { Criteria dateCriteria = where("originalContent.dateSubmitted"); if (startDate != null) dateCriteria = dateCriteria.gte(startDate); if (endDate != null) dateCriteria = dateCriteria.lt(endDate); query.addCriteria(dateCriteria); } if (parameters != null && parameters.size() > 0) for (Map.Entry<String, String> parameter : parameters.entrySet()) { if (parameter.getKey().contains(".") || parameter.getKey().contains("$")) throw new IllegalStateException("Can't add criteria for parameter '" + parameter.getKey() + "' because it contains invalid characters"); query.addCriteria( where("source.resultParameters." + StringEscapeUtils.escapeJavaScript(parameter.getKey())) .is(parameter.getValue())); } if (url != null) query.addCriteria(where("originalContent.uri").is(url)); return query; } }