Java tutorial
/* * NorthRidge Software, LLC - Copyright (c) 2015. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.nridge.ds.solr; import com.nridge.core.app.mgr.AppMgr; import com.nridge.core.base.field.Field; import com.nridge.core.base.doc.Document; import com.nridge.core.base.field.data.DataBag; import com.nridge.core.base.field.data.DataField; import com.nridge.core.base.io.IO; import com.nridge.core.base.io.xml.IOXML; import com.nridge.core.base.std.NSException; import com.nridge.core.base.std.StrUtl; import com.nridge.core.base.std.XMLUtl; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.slf4j.Logger; import org.w3c.dom.*; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import java.io.*; /** * The SolrSchemaXML provides a collection of methods that can load * an XML representation of a Solr schema. In general, the developer * should use the data source I/O methods instead of this helper * implementation. * * @author Al Cole * @since 1.0 */ public class SolrSchemaXML { private final String SOLR_TEXT_FIELD_TYPE_DEFAULT = "text_en"; private Document mDocument; private final AppMgr mAppMgr; public SolrSchemaXML(final AppMgr anAppMgr) { mAppMgr = anAppMgr; mDocument = new Document("Solr Schema"); } public SolrSchemaXML(final AppMgr anAppMgr, Document aDocument) { mAppMgr = anAppMgr; mDocument = aDocument; } public Document getDocument() { return mDocument; } public void setDocument(Document aDocument) { mDocument = aDocument; } private void saveComment(PrintWriter aPW, int anIndentAmount) throws IOException { anIndentAmount++; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("<!-- Valid attributes for fields:%n"); anIndentAmount++; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("name: mandatory - the name for the field%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("type: mandatory - the name of a field type from the <types> fieldType section%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("indexed: true if this field should be indexed (searchable or sortable)%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("stored: true if this field should be retrievable%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("multiValued: true if this field may contain multiple values per document%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("omitNorms: (expert) set to true to omit the norms associated with%n"); anIndentAmount += 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("this field (this disables length normalization and index-time%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("boosting for the field, and saves some memory). Only full-text%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("fields or fields that need an index-time boost need norms.%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("Norms are omitted for primitive (non-analyzed) types by default.%n"); anIndentAmount -= 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("termVectors: [false] set to true to store the term vector for a%n"); anIndentAmount += 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("given field.%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("When using MoreLikeThis, fields used for similarity should be%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("stored for best performance.%n"); anIndentAmount -= 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("termPositions: Store position information with the term vector.%n"); anIndentAmount += 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("This will increase storage costs.%n"); anIndentAmount -= 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("termOffsets: Store offset information with the term vector. This %n"); anIndentAmount += 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("will increase storage costs.%n"); anIndentAmount -= 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("required: The field is required. It will throw an error if the%n"); anIndentAmount += 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("value does not exist%n"); anIndentAmount -= 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("default: a value that should be used if no value is specified%n"); anIndentAmount += 2; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("when adding a document.%n"); anIndentAmount -= 3; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("-->%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("<!--%n"); anIndentAmount++; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("field names should consist of alphanumeric or underscore characters only and%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("not start with a digit. This is not currently strictly enforced,%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("but other field names will not have first class support from all components%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("and back compatibility is not guaranteed. Names with both leading and%n"); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("trailing underscores (e.g. _version_) are reserved.%n"); anIndentAmount--; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("-->%n"); } private String mapFieldType(DataField aField) { String fieldType; switch (aField.getType()) { case Text: if ((StringUtils.endsWith(aField.getName(), "_name")) || (StringUtils.endsWith(aField.getName(), "_title")) || (StringUtils.endsWith(aField.getName(), "_description")) || (StringUtils.endsWith(aField.getName(), "_content"))) fieldType = SOLR_TEXT_FIELD_TYPE_DEFAULT; else fieldType = "string"; break; case Integer: fieldType = "int"; break; case Long: fieldType = "long"; break; case Float: fieldType = "float"; break; case Double: fieldType = "double"; break; case Boolean: fieldType = "boolean"; break; case Date: case Time: case DateTime: fieldType = "date"; break; default: fieldType = "string"; break; } return fieldType; } // http://wiki.apache.org/solr/SchemaXml public void save(PrintWriter aPW, String aTagName, int anIndentAmount) throws IOException { DataField dataField; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("<%s>%n", aTagName); saveComment(aPW, anIndentAmount); anIndentAmount++; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("<field name=\"text\" type=\"%s\" indexed=\"true\" stored=\"true\" multiValued=\"true\"/>%n", SOLR_TEXT_FIELD_TYPE_DEFAULT); DataBag dataBag = mDocument.getBag(); int fieldCount = dataBag.count(); for (int i = 0; i < fieldCount; i++) { dataField = dataBag.getByOffset(i); IOXML.indentLine(aPW, anIndentAmount); aPW.printf("<field name=\"%s\"", dataField.getName()); if (dataField.isFeatureAssigned(Solr.FEATURE_SOLR_TYPE)) aPW.printf(" type=\"%s\"", dataField.getFeature(Solr.FEATURE_SOLR_TYPE)); else aPW.printf(" type=\"%s\"", mapFieldType(dataField)); if (dataField.isFeatureAssigned(Solr.FEATURE_IS_INDEXED)) aPW.printf(" indexed=\"%s\"", dataField.getFeature(Solr.FEATURE_IS_INDEXED)); else aPW.printf(" indexed=\"true\""); if (dataField.isFeatureAssigned(Solr.FEATURE_IS_STORED)) aPW.printf(" stored=\"%s\"", dataField.getFeature(Solr.FEATURE_IS_STORED)); else aPW.printf(" stored=\"true\""); if (dataField.isFeatureTrue(Field.FEATURE_IS_REQUIRED)) aPW.printf(" required=\"true\""); if (dataField.isMultiValue()) aPW.printf(" multiValued=\"true\""); if (dataField.isFeatureTrue(Solr.FEATURE_IS_OMIT_NORMS)) aPW.printf(" omitNorms=\"%s\"", dataField.getFeature(Solr.FEATURE_IS_OMIT_NORMS)); if ((dataField.isFeatureTrue(Solr.FEATURE_IS_DEFAULT)) && (StringUtils.isNotEmpty(dataField.getDefaultValue()))) aPW.printf(" default=\"%s\"", dataField.getDefaultValue()); aPW.printf("/>%n"); } IOXML.indentLine(aPW, anIndentAmount); aPW.printf("<field name=\"_version_\" type=\"long\" indexed=\"true\" stored=\"true\"/>%n"); anIndentAmount--; IOXML.indentLine(aPW, anIndentAmount); aPW.printf("</%s>%n", aTagName); dataField = dataBag.getPrimaryKeyField(); if (dataField != null) { IOXML.indentLine(aPW, anIndentAmount); aPW.printf("<uniqueKey>%s</uniqueKey>%n", dataField.getName()); } String fieldName; for (int i = 0; i < fieldCount; i++) { dataField = dataBag.getByOffset(i); fieldName = dataField.getName(); if ((StringUtils.endsWith(fieldName, "_name")) || (StringUtils.endsWith(fieldName, "_title")) || (StringUtils.endsWith(fieldName, "_description")) || (StringUtils.endsWith(fieldName, "_content"))) { IOXML.indentLine(aPW, anIndentAmount); aPW.printf("<copyField source=\"%s\" dest=\"text\"/>%n", fieldName); } } } public void save(PrintWriter aPW, int anIndentAmount) throws IOException { save(aPW, "fields", anIndentAmount); } public void save(PrintWriter aPW) throws IOException { save(aPW, 2); } public void save(String aPathFileName) throws IOException { PrintWriter printWriter = new PrintWriter(aPathFileName, "UTF-8"); save(printWriter); printWriter.close(); } private Field.Type mapSolrFieldType(String aSolrFieldType) { if (StringUtils.equalsIgnoreCase(aSolrFieldType, "int")) return Field.Type.Integer; else if (StringUtils.equalsIgnoreCase(aSolrFieldType, "long")) return Field.Type.Long; else if (StringUtils.equalsIgnoreCase(aSolrFieldType, "float")) return Field.Type.Float; else if (StringUtils.equalsIgnoreCase(aSolrFieldType, "double")) return Field.Type.Double; else if (StringUtils.equalsIgnoreCase(aSolrFieldType, "boolean")) return Field.Type.Boolean; else if ((StringUtils.equalsIgnoreCase(aSolrFieldType, "date")) || (StringUtils.equalsIgnoreCase(aSolrFieldType, "time"))) return Field.Type.DateTime; else return Field.Type.Text; } private void assignSolrFieldFeature(DataField aField, String aName, String aValue) { if (StringUtils.equalsIgnoreCase(aName, "indexed")) { if (StrUtl.stringToBoolean(aValue)) aField.enableFeature("isIndexed"); } else if (StringUtils.equalsIgnoreCase(aName, "stored")) { if (StrUtl.stringToBoolean(aValue)) aField.enableFeature("isStored"); } else if (StringUtils.equalsIgnoreCase(aName, "docValues")) { if (StrUtl.stringToBoolean(aValue)) aField.enableFeature("isDocValue"); } else if (StringUtils.equalsIgnoreCase(aName, "multiValued")) { if (StrUtl.stringToBoolean(aValue)) aField.setMultiValueFlag(true); } else aField.addFeature(aName, aValue); } private DataField loadField(Element anElement) { Attr nodeAttr; DataField dataField; Field.Type fieldType; String nodeName, nodeValue; Logger appLogger = mAppMgr.getLogger(this, "loadField"); appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER); String attrValue = anElement.getAttribute("name"); if (StringUtils.isNotEmpty(attrValue)) { String fieldName = attrValue; attrValue = anElement.getAttribute("type"); if (StringUtils.isNotEmpty(attrValue)) fieldType = mapSolrFieldType(attrValue); else fieldType = Field.Type.Text; dataField = new DataField(fieldType, fieldName); dataField.setTitle(Field.nameToTitle(fieldName)); NamedNodeMap namedNodeMap = anElement.getAttributes(); int attrCount = namedNodeMap.getLength(); for (int attrOffset = 0; attrOffset < attrCount; attrOffset++) { nodeAttr = (Attr) namedNodeMap.item(attrOffset); nodeName = nodeAttr.getNodeName(); nodeValue = nodeAttr.getNodeValue(); if (StringUtils.isNotEmpty(nodeValue)) { if ((!StringUtils.equalsIgnoreCase(nodeName, "name")) && (!StringUtils.equalsIgnoreCase(nodeName, "type"))) assignSolrFieldFeature(dataField, nodeName, nodeValue); } } } else dataField = null; appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART); return dataField; } private void loadFields(Document aDocument, Element anElement) { Node nodeItem; String nodeName; DataField dataField; Element nodeElement; Logger appLogger = mAppMgr.getLogger(this, "loadFields"); appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER); DataBag dataBag = aDocument.getBag(); NodeList nodeList = anElement.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { nodeItem = nodeList.item(i); if (nodeItem.getNodeType() != Node.ELEMENT_NODE) continue; nodeName = nodeItem.getNodeName(); if (StringUtils.equalsIgnoreCase(nodeName, "field")) { nodeElement = (Element) nodeItem; dataField = loadField(nodeElement); if (dataField != null) dataBag.add(dataField); } } appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART); } private Document loadSchema(Element anElement) throws IOException { Node nodeItem; Document document; Element nodeElement; DataField dataField; String nodeName, nodeValue; Logger appLogger = mAppMgr.getLogger(this, "loadSchema"); appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER); String schemaName = anElement.getAttribute("name"); document = new Document("Solr Schema"); if (StringUtils.isNotEmpty(schemaName)) document.setName(schemaName); DataBag dataBag = document.getBag(); NodeList nodeList = anElement.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { nodeItem = nodeList.item(i); if (nodeItem.getNodeType() != Node.ELEMENT_NODE) continue; nodeName = nodeItem.getNodeName(); if (StringUtils.equalsIgnoreCase(nodeName, "fields")) { nodeElement = (Element) nodeItem; loadFields(document, nodeElement); } else if (StringUtils.equalsIgnoreCase(nodeName, "field")) { nodeElement = (Element) nodeItem; dataField = loadField(nodeElement); if (dataField != null) dataBag.add(dataField); } else if (StringUtils.equalsIgnoreCase(nodeName, "uniqueKey")) { nodeValue = XMLUtl.getNodeStrValue(nodeItem); dataField = document.getBag().getFieldByName(nodeValue); if (dataField != null) dataField.enableFeature(Field.FEATURE_IS_PRIMARY_KEY); } } appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART); return document; } /** * Parses an XML DOM element and loads it into a document. * * @param anElement DOM element. * @throws java.io.IOException I/O related exception. */ public void load(Element anElement) throws IOException { Logger appLogger = mAppMgr.getLogger(this, "load"); appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER); mDocument = loadSchema(anElement); appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART); } /** * Parses an XML DOM element and loads it into a document. * * @param anIS Input stream. * @throws java.io.IOException I/O related exception. * @throws javax.xml.parsers.ParserConfigurationException XML parser related exception. * @throws org.xml.sax.SAXException XML parser related exception. */ public void load(InputStream anIS) throws ParserConfigurationException, IOException, SAXException { Logger appLogger = mAppMgr.getLogger(this, "load"); appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER); DocumentBuilderFactory docBldFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docBldFactory.newDocumentBuilder(); InputSource inputSource = new InputSource(anIS); org.w3c.dom.Document xmlDocument = docBuilder.parse(inputSource); xmlDocument.getDocumentElement().normalize(); load(xmlDocument.getDocumentElement()); appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART); } /** * Parses an XML file identified by the path/file name parameter * and loads it into a document. * * @param aPathFileName Absolute file name. * @throws java.io.IOException I/O related exception. * @throws javax.xml.parsers.ParserConfigurationException XML parser related exception. * @throws org.xml.sax.SAXException XML parser related exception. */ public void load(String aPathFileName) throws IOException, ParserConfigurationException, SAXException { Logger appLogger = mAppMgr.getLogger(this, "load"); appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER); File xmlFile = new File(aPathFileName); if (!xmlFile.exists()) throw new IOException(aPathFileName + ": Does not exist."); DocumentBuilderFactory docBldFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docBldFactory.newDocumentBuilder(); org.w3c.dom.Document xmlDocument = docBuilder.parse(new File(aPathFileName)); xmlDocument.getDocumentElement().normalize(); load(xmlDocument.getDocumentElement()); appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART); } /** * Downloads the Solr schema file identified via the URL parameter * and store it to the path/file name specified. The Solr Dashboard * exposes the URL to a schema file. * * @param aURL Uniform Resource Location of schema file. * @param aPathFileName Path/Name where file should be stored. * * @throws NSException Thrown when I/O errors are detected. */ public void downloadAndSave(String aURL, String aPathFileName) throws NSException { Logger appLogger = mAppMgr.getLogger(this, "downloadAndSave"); appLogger.trace(mAppMgr.LOGMSG_TRACE_ENTER); if ((StringUtils.isNotEmpty(aURL)) && (StringUtils.isNotEmpty(aPathFileName))) { InputStream inputStream = null; OutputStream outputStream = null; CloseableHttpResponse httpResponse = null; File schemaFile = new File(aPathFileName); HttpGet httpGet = new HttpGet(aURL); CloseableHttpClient httpClient = HttpClients.createDefault(); try { httpResponse = httpClient.execute(httpGet); HttpEntity httpEntity = httpResponse.getEntity(); inputStream = httpEntity.getContent(); outputStream = new FileOutputStream(schemaFile); IOUtils.copy(inputStream, outputStream); } catch (IOException e) { String msgStr = String.format("%s (%s): %s", aURL, aPathFileName, e.getMessage()); appLogger.error(msgStr, e); throw new NSException(msgStr); } finally { if (inputStream != null) IO.closeQuietly(inputStream); if (outputStream != null) IO.closeQuietly(outputStream); if (httpResponse != null) IO.closeQuietly(httpResponse); } } appLogger.trace(mAppMgr.LOGMSG_TRACE_DEPART); } }