Java tutorial
/* * Copyright (c) 2009 - 2010. School of Information Technology and Electrical * Engineering, The University of Queensland. This software is being developed * for the "Phenomics Ontoogy Driven Data Management Project (PODD)" project. * PODD is a National e-Research Architecture Taskforce (NeAT) project * co-funded by ANDS and ARCS. * * PODD is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PODD is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with PODD. If not, see <http://www.gnu.org/licenses/>. */ package podd.search.impl; import info.aduna.collections.iterators.CloseableIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.openrdf.model.Graph; import org.springframework.util.StringUtils; import podd.dataaccess.DAO; import podd.dataaccess.misc.PostUpdater; import podd.exception.DataAccessException; import podd.exception.ObjectTripleGenerationException; import podd.exception.PostProcessingException; import podd.exception.SearchIndexingException; import podd.model.entity.PoddEntity; import podd.model.entity.PoddObject; import podd.model.project.Project; import podd.search.IndexFields; import podd.triples.GraphParser; import podd.triples.GraphParserFactory; import podd.triples.ObjectTriplesGenerator; import java.io.IOException; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; /** * An Index manager for inserting/updating/deleting index records by communicating with a configured Solr server * * @author Philip Wu */ public class IndexManager implements PostUpdater { static final Logger LOGGER = LoggerFactory.getLogger(IndexManager.class); /** * For Solr Dynamic fields. Anything that ends with _t will be regarded as text in solr. */ public static final String INDEX_TEXT = "_t"; /** * For Solr Dynamic fields. Anything that ends with _d will be regarded as a date in solr. */ public static final String INDEX_DATE = "_dt"; /** * The StrField type is not analyzed, but indexed/stored verbatim. */ public static final String INDEX_STRING = "_s"; /** * The connection configuration to the solr server */ protected SolrServer solrServer; /** * Control the behaviour of using threads * False is used for testcases that need to wait for indexing to complete to make assertions */ protected boolean useThreads = true; /** * For better performance we can batch up the documents to be submitted for indexing */ protected int batchSize = 20; /** * Used to fetch all the details of the poddObject from its triples */ protected ObjectTriplesGenerator objectTriplesGenerator; /** * Controls the indexing of documents by caching behind the scenes */ protected DocumentCacheManager cacheManager = new DocumentCacheManager(); protected DAO objectDao; public SolrServer getSolrServer() { return solrServer; } public void setSolrServer(SolrServer solrServer) { this.solrServer = solrServer; } public boolean isUseThreads() { return useThreads; } public void setUseThreads(boolean useThreads) { this.useThreads = useThreads; } public int getBatchSize() { return batchSize; } public void setBatchSize(int batchSize) { this.batchSize = batchSize; } public ObjectTriplesGenerator getObjectTriplesGenerator() { return objectTriplesGenerator; } public void setObjectTriplesGenerator(ObjectTriplesGenerator objectTriplesGenerator) { this.objectTriplesGenerator = objectTriplesGenerator; } public DAO getObjectDao() { return objectDao; } public void setObjectDao(DAO objectDao) { this.objectDao = objectDao; } @Override public void close() throws PostProcessingException { // TODO Auto-generated method stub } @Override public void doDelete(PoddEntity poddEntity) throws PostProcessingException { deleteIndexedPoddObject(poddEntity); } @Override public void doSave(PoddEntity poddEntity) throws PostProcessingException { indexPoddObject(poddEntity); } @Override public void doUpdate(PoddEntity poddEntity) throws PostProcessingException { indexPoddObject(poddEntity); } /** * Flushes the cache * FIXME: Why is this method synchronized? */ public synchronized void flush() { try { LOGGER.info("Index manager invokes flushing."); cacheManager.flush(); } catch (IOException e) { LOGGER.error("Found exception", e); } catch (SolrServerException e) { LOGGER.error("Found exception", e); } } /** * Add or update an entry of the poddObject in the search index * * @param poddObject */ public void indexPoddObject(Object poddObject) { if (poddObject instanceof PoddObject) { PoddObject poddObj = (PoddObject) poddObject; LOGGER.debug("Indexing poddObject: " + poddObj.getPid()); IndexGenerator indexGenerator = new IndexGenerator(poddObj); if (useThreads) { // Fork a thread to do the indexing submission Thread t = new Thread(indexGenerator); t.start(); } else { // Do the indexing without forking a thread indexGenerator.run(); } } } /** * Delete a podd object from the index if it exists. * * @param poddObject */ public void deleteIndexedPoddObject(Object poddObject) { if (poddObject instanceof PoddObject) { PoddObject poddObj = (PoddObject) poddObject; LOGGER.debug("Deleting: " + poddObj.getPid()); // Invoke delete on Solr if (poddObj.getId() != null) { try { // Before we delete, we must flush the cache synchronized (this) { flush(); } solrServer.deleteByQuery("id:" + ClientUtils.escapeQueryChars(poddObj.getPid())); solrServer.commit(); } catch (SolrServerException e) { LOGGER.error("Found exception", e); } catch (IOException e) { LOGGER.error("Found exception", e); } } } } /** * Clears the index */ public void clearIndex() { try { solrServer.deleteByQuery("*:*"); solrServer.commit(); LOGGER.debug("Index has been cleared"); } catch (SolrServerException e) { LOGGER.error("Found exception", e); } catch (IOException e) { LOGGER.error("Found exception", e); } } /** * Clear and rebuild the index from all the podd objects currently available */ public long rebuildIndex() { LOGGER.info("rebuilding index"); // Clear the index first this.clearIndex(); // Now rebuild the index long numIndexed = 0; try { for (CloseableIterator<PoddObject> it = objectDao.getAll(); it.hasNext();) { PoddObject poddObject = null; // Avoid Inconsistency errors try { poddObject = it.next(); } catch (RuntimeException e) { LOGGER.error("Found exception", e); } if (poddObject != null) { // No threading allowed, we wait for process to complete entirely IndexGenerator indexGenerator = new IndexGenerator(poddObject); indexGenerator.run(); numIndexed++; } } } catch (DataAccessException e) { LOGGER.error("Found exception", e); } this.flush(); return numIndexed; } /** * Inner class for generating an index */ class IndexGenerator implements Runnable { private PoddObject poddObj; public IndexGenerator(PoddObject poddObj) { this.poddObj = poddObj; } public void run() { try { doIndexing(); } catch (SearchIndexingException e) { e.printStackTrace(); LOGGER.error("Found exception", e); } } /** * Submits the PoddObject for indexing */ public void doIndexing() throws SearchIndexingException { // Populate the fields of the document SolrInputDocument doc = new SolrInputDocument(); doc.addField(IndexFields.ID.toString(), poddObj.getPid()); //doc.addField(IndexFields.CREATION_DATE.toString(), poddObj.getCreatedTime()); doc.addField(IndexFields.OBJECT_TYPE.toString(), poddObj.getConcept().getConceptName()); handleGraph(poddObj, doc); // Mainly for test cases if (doc.getFieldValue(IndexFields.DESCRIPTION.toString()) == null) { doc.addField(IndexFields.DESCRIPTION.toString(), poddObj.getLabel()); } try { cacheManager.addDocument(poddObj.getPid(), doc); } catch (SolrServerException e) { LOGGER.error("Found exception", e); } catch (IOException e) { LOGGER.error("Found exception", e); } } } /** * Extracts Graph triples and store them into a document for indexing * * @param poddObj * @param doc * @throws SearchIndexingException */ private void handleGraph(PoddObject poddObj, SolrInputDocument doc) throws SearchIndexingException { try { Project project = ObjectTriplesGenerator.getAncestor(poddObj); //LOGGER.info("project="+project); Graph graph = objectTriplesGenerator.getObjectTriples(poddObj); GraphParser parser = GraphParserFactory.getInstance().createGraphParser(graph, poddObj); parser.parse(); Map<String, Object> fieldValueMap = parser.getFieldValueMap(); if (fieldValueMap != null) { for (Map.Entry<String, Object> entry : fieldValueMap.entrySet()) { Object fieldValue = entry.getValue(); String fieldName = entry.getKey() + getPostfix(fieldValue); // dynamic field //LOGGER.debug("fieldName="+fieldName+ " fieldValue="+fieldValue); Object existingFieldValue = doc.getFieldValue(fieldName); if (existingFieldValue == null || !StringUtils.hasText(existingFieldValue.toString())) { doc.addField(fieldName, fieldValue); } else { LOGGER.error("SolrDocument with index field name: " + fieldName + ", already has a value of " + existingFieldValue); } } } if (project != null) { doc.addField(IndexFields.PROJECT_ID.toString(), project.getPid()); } } catch (ObjectTripleGenerationException e1) { LOGGER.error("Found exception", e1); e1.printStackTrace(); // recast and throw throw new SearchIndexingException(e1); } catch (IllegalArgumentException e) { LOGGER.error("Found exception", e); // TODO: Don't recast to keep existing test cases working } } /** * Retrieve the postfix based on the type of the object * * @param obj * @return */ private String getPostfix(Object obj) { if (obj instanceof Date) { return this.INDEX_DATE; } else if (obj instanceof EnumValue) { return this.INDEX_STRING; } else { return this.INDEX_TEXT; } } /** * Controls the caching of Solr documents meant for indexing * * @author Philip Wu */ class DocumentCacheManager { /** * Cache of SolrDocuments. This cache should get flushed when the batch size has been reached, or somebody invokes a search. * We don't want the multiple SolrInputDocuments for the same PoddObject with the same ID to exist in the cache, so we use * a map: PODD ID ---> SolrInputDocument * <p/> * Since we fork a thread to cache documents, we must make the collection thread-safe. */ private Map<String, SolrInputDocument> documentCache = Collections .synchronizedMap(new HashMap<String, SolrInputDocument>()); /** * Adds document to cache and flush when batch size has been reached * * @param solrDocument * @throws IOException * @throws SolrServerException */ public synchronized void addDocument(String pid, SolrInputDocument solrDocument) throws IOException, SolrServerException { // replace the existing solrDocument for that PID documentCache.put(pid, solrDocument); if (documentCache.size() > batchSize) { flush(); } } /** * Flush the cache * * @throws IOException * @throws SolrServerException */ public synchronized void flush() throws IOException, SolrServerException { LOGGER.info("Flushing Solr index, # = " + documentCache.size()); if (documentCache.size() > 0) { try { solrServer.add(documentCache.values()); solrServer.commit(); } catch (SolrException e) { LOGGER.error("Failed to commit document cache. Will commit each document one-by-one."); LOGGER.error("Flushing document cache exception.", e); solrServer.rollback(); Set<String> removalSet = new HashSet<String>(); // Commit each document that can be flushed and skip the offending documents by removing them // TODO YF: Since we're clearing the documentCache at the end of the method, why do we need to remove the offending document? for (String pid : documentCache.keySet()) { SolrInputDocument doc = documentCache.get(pid); if (!doc.isEmpty()) { try { solrServer.add(doc); solrServer.commit(); } catch (SolrException e2) { solrServer.rollback(); // remove the document from cache: removalSet.add(pid); LOGGER.error("Fail-safe. Removing offending document: " + doc.getField(IndexFields.ID.toString())); } } else { removalSet.add(pid); LOGGER.error("Fail-safe. Removed empty document: " + doc.getField(IndexFields.ID.toString())); } } for (String pid : removalSet) { documentCache.remove(pid); } } finally { documentCache.clear(); } } LOGGER.info("Finished flushing."); } } }