Java tutorial
package edu.ucsb.nceas.mdqengine.solr; /** * Copyright: 2013 Regents of the University of California and the * National Center for Ecological Analysis and Synthesis * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ import org.apache.commons.codec.EncoderException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.UpdateResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import org.dataone.cn.indexer.XMLNamespaceConfig; import org.dataone.cn.indexer.parser.*; import org.dataone.cn.indexer.solrhttp.SolrDoc; import org.dataone.cn.indexer.solrhttp.SolrElementField; import org.dataone.exceptions.MarshallingException; import org.dataone.service.exceptions.NotFound; import org.dataone.service.exceptions.NotImplemented; import org.dataone.service.exceptions.UnsupportedType; import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.TypeMarshaller; import org.w3c.dom.Document; import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import java.io.*; import java.util.*; /** * A class that performs inserts to a SOLR server * @author tao * @author slaughter * */ public class SolrIndex { public static final String ID = "id"; private static final String IDQUERY = ID + ":*"; private List<IDocumentSubprocessor> subprocessors = null; private SolrClient solrClient = null; private XMLNamespaceConfig xmlNamespaceConfig = null; private List<SolrField> sysmetaSolrFields = null; private static DocumentBuilderFactory documentBuilderFactory = null; private static DocumentBuilder builder = null; private static XPathFactory xpathFactory = null; private static XPath xpath = null; Log log = LogFactory.getLog(SolrIndex.class); static { documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilderFactory.setNamespaceAware(true); try { builder = documentBuilderFactory.newDocumentBuilder(); } catch (ParserConfigurationException e) { e.printStackTrace(); } xpathFactory = XPathFactory.newInstance(); xpath = xpathFactory.newXPath(); } /** * Constructor * @throws SAXException * @throws IOException */ public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig, List<SolrField> sysmetaSolrFields) throws XPathExpressionException, ParserConfigurationException, IOException, SAXException { this.xmlNamespaceConfig = xmlNamespaceConfig; this.sysmetaSolrFields = sysmetaSolrFields; init(); } private void init() throws ParserConfigurationException, XPathExpressionException { xpath.setNamespaceContext(xmlNamespaceConfig); initExpressions(); } private void initExpressions() throws XPathExpressionException { for (SolrField field : sysmetaSolrFields) { field.initExpression(xpath); } } /** * Get the list of the Subprocessors in this index. * @return the list of the Subprocessors. */ public List<IDocumentSubprocessor> getSubprocessors() { return subprocessors; } /** * Set the list of Subprocessors. * @param subprocessorList the list will be set. */ public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) { for (IDocumentSubprocessor subprocessor : subprocessorList) { if (subprocessor instanceof BaseXPathDocumentSubprocessor) { ((BaseXPathDocumentSubprocessor) subprocessor).initExpression(xpath); } } this.subprocessors = subprocessorList; } /** * Generate the index for the given information * @param id * @param systemMetadata * @param objectPath * @return * @throws IOException * @throws SAXException * @throws ParserConfigurationException * @throws XPathExpressionException * @throws MarshallingException * @throws SolrServerException * @throws EncoderException * @throws UnsupportedType * @throws NotFound * @throws NotImplemented */ private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, String objectPath) throws IOException, SAXException, MarshallingException, SolrServerException { log.debug("SolrIndex.process - trying to generate the solr doc object for the pid " + id); // Load the System Metadata document ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream); ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream( systemMetadataOutputStream.toByteArray()); Document sysMetaDoc = generateXmlDocument(systemMetadataStream); if (sysMetaDoc == null) { log.error("Could not load System metadata for ID: " + id); return null; } // Extract the field values from the System Metadata List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id); SolrDoc indexDocument = new SolrDoc(sysSolrFields); Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>(); docs.put(id, indexDocument); // get the format id for this object String formatId = indexDocument.getFirstFieldValue(SolrElementField.FIELD_OBJECTFORMAT); log.debug("SolrIndex.process - the object format id for the pid " + id + " is " + formatId); // Determine if subprocessors are available for this ID if (subprocessors != null) { // for each subprocessor loaded from the spring config for (IDocumentSubprocessor subprocessor : subprocessors) { // Does this subprocessor apply? log.debug("SolrIndex.process - trying subprocessor " + subprocessor.getClass().getName()); if (subprocessor.canProcess(formatId)) { log.debug("SolrIndex.process - using subprocessor " + subprocessor.getClass().getName()); // if so, then extract the additional information from the // document. try { // docObject = the resource map document or science // metadata document. // note that resource map processing touches all objects // referenced by the resource map. FileInputStream dataStream = new FileInputStream(objectPath); if (!dataStream.getFD().valid()) { log.error("SolrIndex.process - subprocessor " + subprocessor.getClass().getName() + " couldn't process since it could not load OBJECT file for ID,Path=" + id + ", " + objectPath); //throw new Exception("Could not load OBJECT for ID " + id ); } else { log.debug("SolrIndex.process - subprocessor " + subprocessor.getClass().getName() + " generating solr doc for id " + id); docs = subprocessor.processDocument(id, docs, dataStream); log.debug("SolrIndex.process - subprocessor " + subprocessor.getClass().getName() + " generated solr doc for id " + id); } } catch (Exception e) { e.printStackTrace(); log.error(e.getMessage(), e); throw new SolrServerException(e.getMessage()); } } } } else { log.debug("Subproccor list is null"); } return docs; } /* * Generate a Document from the InputStream */ private Document generateXmlDocument(InputStream smdStream) throws SAXException { Document doc = null; try { doc = builder.parse(smdStream); } catch (IOException e) { log.error(e.getMessage(), e); } return doc; } /* * Index the fields of the system metadata */ private List<SolrElementField> processSysmetaFields(Document doc, String identifier) { List<SolrElementField> fieldList = new ArrayList<SolrElementField>(); // solrFields is the list of fields defined in the application context for (SolrField field : sysmetaSolrFields) { try { // the field.getFields method can return a single value or // multiple values for multi-valued fields // or can return multiple SOLR document fields. fieldList.addAll(field.getFields(doc, identifier)); } catch (Exception e) { e.printStackTrace(); } } return fieldList; } /** * Check the parameters of the insert or update methods. * @param pid * @param systemMetadata * @param objectPath * @throws SolrServerException */ private void checkParams(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException { if (pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) { throw new SolrServerException("The identifier of the indexed document should not be null or blank."); } if (systemMetadata == null) { throw new SolrServerException( "The system metadata of the indexed document " + pid.getValue() + " should not be null."); } if (objectPath == null) { throw new SolrServerException( "The indexed document itself for pid " + pid.getValue() + " should not be null."); } } /** * Insert the indexes for a document. * @param pid the id of this document * @param systemMetadata the system metadata associated with the data object * @param objectPath the path to the object file itself * @throws SolrServerException * @throws MarshallingException * @throws EncoderException * @throws UnsupportedType * @throws NotFound * @throws NotImplemented */ public synchronized void insert(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException, SolrServerException, MarshallingException, EncoderException, NotImplemented, NotFound, UnsupportedType { log.debug("Identifier: " + pid.getValue()); log.debug("sysmeta pid" + systemMetadata.getIdentifier().getValue()); log.debug("objectPath: " + objectPath); checkParams(pid, systemMetadata, objectPath); log.info("SolrIndex.insert - trying to insert the solrDoc for object " + pid.getValue()); Map<String, SolrDoc> docs = process(pid.getValue(), systemMetadata, objectPath); //transform the Map to the SolrInputDocument which can be used by the solr server if (docs != null) { Set<String> ids = docs.keySet(); for (String id : ids) { if (id != null) { SolrDoc doc = docs.get(id); insertToIndex(doc); log.debug("SolrIndex.insert - inserted the solr document object for pid " + id + ", which relates to object " + pid.getValue() + ", into the solr server."); } } log.debug("SolrIndex.insert - finished to insert the solrDoc for object " + pid.getValue()); } else { log.debug("SolrIndex.insert - the generated solrDoc is null. So we will not index the object " + pid.getValue()); } } /* * Insert a SolrDoc to the solr server. */ private synchronized void insertToIndex(SolrDoc doc) throws SolrServerException, IOException { if (doc != null) { SolrInputDocument solrDoc = new SolrInputDocument(); List<SolrElementField> list = doc.getFieldList(); if (list != null) { Iterator<SolrElementField> iterator = list.iterator(); while (iterator.hasNext()) { SolrElementField field = iterator.next(); if (field != null) { String value = field.getValue(); String name = field.getName(); solrDoc.addField(name, value); } } } if (!solrDoc.isEmpty()) { try { UpdateResponse response = solrClient.add(solrDoc); solrClient.commit(); } catch (SolrServerException e) { throw e; } catch (IOException e) { throw e; } } } } /** * Get the Solr client instance * @return */ public SolrClient getSolrServer() { return solrClient; } /** * Set the Solr client. * @param solrClient */ public void setSolrClient(SolrClient solrClient) { this.solrClient = solrClient; } /** * Get all indexed ids in the solr server. * @return an empty list if there is no index. * @throws SolrServerException */ public List<String> getSolrIds() throws SolrServerException, IOException { List<String> list = new ArrayList<String>(); SolrQuery query = new SolrQuery(IDQUERY); query.setRows(Integer.MAX_VALUE); query.setFields(ID); QueryResponse response = solrClient.query(query); SolrDocumentList docs = response.getResults(); if (docs != null) { for (SolrDocument doc : docs) { String identifier = (String) doc.getFieldValue(ID); list.add(identifier); } } return list; } }