Java tutorial
package com.revorg.goat; /* * Copyright (c) Grover C. Fields, http://www.groverfields.com/, 2005-2009. * All rights reserved. Software written by Grover C. Fields and others. * $Id: LICENSE,v 1.0 2008/12/01 05:00:00 Grover Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun's Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun's, and James Gosling's, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */ import com.revorg.goat.html.HTMLParser; import com.revorg.goat.utilities.Utilities; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.lang.String; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.Date; import java.util.LinkedList; import java.util.List; import java.util.Properties; import java.util.Random; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * This class manages the Lucene Collections * * @version 1.0 * @author Grover C. Fields (grover_fields@yahoo.com) * @author http://www.groverfields.com */ public class IndexManager implements java.io.Serializable { static char dirSep = System.getProperty("file.separator").charAt(0); static String tempDirectory = System.getProperty("java.io.tmpdir"); private static String ActionResult; private static String ActionResultError; private static IndexReader reader; private static IndexWriter writer; public IndexManager() { } /** * Create a Lucene Index. * * @param indexPath Directory that contains the Lucene Collection * @throws Exception * @return ActionResult */ public static String createIndex(String indexPath) { File theDir = new File(indexPath); // if the directory does not exist, create it try { //if the directory exist, do not create it if (theDir.exists()) { System.out.println("Failure to create index: " + indexPath); System.out.println("The index/directory already exists:"); ActionResult = "Failure"; return ActionResult + ActionResultError; } //StandardAnalyzer new StandardAnalyzer() = new StandardAnalyzer(); //Initialize Class IndexWriter writer = new IndexWriter(indexPath, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.close(); ActionResult = "Success"; return ActionResult; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to create index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; } /** * unregisters a collection and deletes its directories. * * @param indexPath Directory that contains the Lucene Collection * @throws Exception * @return ActionResult */ public static String deleteIndex(String indexPath) { //Cast To Directory File theDir = new File(indexPath); //Convert String To File try { IndexReader reader = IndexReader.open(indexPath); boolean indexExists = reader.indexExists(indexPath); reader.close(); //Close Index //if (theDir.exists() != stringFails) if (theDir.exists()) { FileUtils.deleteDirectory(theDir); ActionResult = "Success"; return ActionResult; } else { ActionResult = "Failure to delete index: " + indexPath; return ActionResult; } } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to delete index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; } /** * Optimizes the structure and contents of the collection for searching; recovers space. Causes collection to be taken offline, preventing searches and indexing. * * @param indexPath Directory that contains the Lucene Collection * @throws Exception * @return ActionResult */ public static String optimizeIndex(String indexPath) { try { //StandardAnalyzer new StandardAnalyzer() = new StandardAnalyzer(); //Initialize Class IndexWriter writer = new IndexWriter(indexPath, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writer.optimize(); writer.close(); ActionResult = "Success"; return ActionResult; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to optimize index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; } /** * deletes all of the documents in a collection. Causes the collection to be taken offline, preventing searches. * * @param indexPath Directory that contains the Lucene Collection * @throws Exception * @return ActionResult */ public static String purgeIndex(String indexPath) { try { String indexExists = isIndexExistant(indexPath); if (indexExists.equalsIgnoreCase("Yes")) { //StandardAnalyzer new StandardAnalyzer() = new StandardAnalyzer(); //Initialize Class IndexWriter writer = new IndexWriter(indexPath, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.commit(); writer.close(); ActionResult = "Success"; return ActionResult; } else { throw new Exception("Unable to open index"); } } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to purge index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; } /** * Counts the total number of documents in the index. * * @param indexPath Directory that contains the Lucene Collection * @throws Exception * @return ActionResult */ public static String getIndexCount(String indexPath) { try { //StandardAnalyzer new StandardAnalyzer() = new StandardAnalyzer(); //Initialize Class IndexWriter writer = new IndexWriter(indexPath, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); int totalInIndex = writer.maxDoc(); ActionResult = Integer.toString(totalInIndex); writer.commit(); writer.close(); return ActionResult; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to count index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; } /** * Gets the Index Version. * * @param indexPath Directory that contains the Lucene Collection * @throws Exception * @return ActionResult */ public static String getIndexVersion(String indexPath) { try { IndexReader reader = IndexReader.open(indexPath); ActionResult = Long.toString(reader.getVersion()); reader.close(); return ActionResult; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to optimize index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; } /** * Checks to see whether an index exists or not * * @param indexPath Directory that contains the Lucene Collection * @throws Exception * @return ActionResult */ public static String isIndexExistant(String indexPath) { try { IndexReader reader = IndexReader.open(indexPath); boolean indexExists = reader.indexExists(indexPath); reader.close(); //Close Index if (indexExists) { ActionResult = "Yes"; } else { ActionResult = "No"; } return ActionResult; } catch (Exception e) { IndexManager.deleteIndex(indexPath); //Delete index ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to check index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; } /** * Checks to see whether an index is locked or not. * * @param indexPath Directory that contains the Lucene Collection * @throws Exception * @return ActionResult */ public static String isIndexLocked(String indexPath) { try { IndexWriter writer = new IndexWriter(indexPath, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); boolean indexLocked = writer.isLocked(indexPath); writer.close(); //Close Writer if (indexLocked) { ActionResult = "Yes"; } else { ActionResult = "No"; } return ActionResult; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to optimize index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; } /** * Merges two indexes together. * * @param primaryIndex The Primary Lucene Index * @param secondaryIndex The Secondary Lucene Index that should be merged * @throws Exception * @return ActionResult */ public static String mergeIndexes(String primaryIndex, String secondaryIndex) { try { //Writer Class IndexWriter writer = new IndexWriter(primaryIndex, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); //Merge Index #2 to Index #1 writer.addIndexesNoOptimize(new Directory[] { FSDirectory.getDirectory(secondaryIndex) }); writer.commit(); writer.optimize(); writer.close(); ActionResult = "Success"; return ActionResult; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to merge index: " + primaryIndex); System.out.println(ActionResultError); } ActionResult = "Failure"; return ActionResult + ActionResultError; } /** * Returns the list of index types that can be used by GOAT. * * @throws Exception * @return ActionResult */ public static List getIndexTypes() { try { // Create the list List list = new LinkedList(); // Doubly-linked list list = new ArrayList(); // List implemented as growable array // Append an element to the list list.add("PrimaryKey"); list.add("Date"); list.add("TriggerUpdate"); list.add("TriggerDelete"); list.add("Keyword"); list.add("UnIndexed"); list.add("Binary"); list.add("Text"); list.add("UnStored"); list.add("HTML"); return list; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to optimize index: "); } ActionResult = "Failure"; return new LinkedList(); } /** * Creates documents inside of the Lucene Index * * @param driverName Database Driver * @param sourceURL Database Connection URL * @param dbUsername Database Username * @param dbPassword Database Password * @param execSQL T-SQL * @throws Exception * @return ActionResult */ public static String indexDatabase(String indexPath, String driverName, String sourceURL, String dbUsername, String dbPassword, String execSQL) { // execSQL = execSQL.replace("from", "FROM"); execSQL = execSQL.replace("FROM", " FROM "); String returnResult = ""; String workPath = indexPath + dirSep + "working" + new Random().nextInt(100000) + dirSep; String configFile = indexPath + dirSep + "config" + dirSep + "dsschema.xml"; String tempHTMLDir = tempDirectory + dirSep + "htmlIndexing" + dirSep + Utilities.CreateUUID() + dirSep; File workDir = new File(workPath); File schemaFile = new File(configFile); File htmlIndexing = new File(tempHTMLDir); // Declare the JDBC objects. Connection con = null; Statement stmt = null; ResultSet rs = null; ResultSetMetaData rsMetaData = null; try { //Get Configuration File if (schemaFile.exists() == false) { ActionResultError = "DB Schema File (" + schemaFile + ")Does Not Exists"; System.out.println(ActionResultError); ActionResult = "Failure "; return ActionResult + ActionResultError; } //Make Sure Working Director Does Not Exists if (workDir.exists()) { ActionResultError = "Failure to create index: " + workPath + " The index/directory already exists:"; ActionResult = "Failure"; return ActionResult + ActionResultError; } else { //Create Temporary Index IndexManager.createIndex(workPath); } //Load the driver class Class.forName(driverName); System.out.println("Driver Loaded"); Properties prop = new Properties(); prop.setProperty("user", dbUsername); prop.setProperty("password", dbPassword); DriverManager.setLoginTimeout(5); //Set Login Time con = DriverManager.getConnection(sourceURL, prop); //Result Set // Create and execute an SQL statement that returns some data. String SQL = execSQL; stmt = con.createStatement(); rs = stmt.executeQuery(SQL); rsMetaData = rs.getMetaData(); int columns = rsMetaData.getColumnCount(); String[] indexTypeArray = new String[columns]; String[] columnNamesArray = new String[columns]; //Set array Length to Column Length from Meta Data String[] columnTypesArray = new String[columns]; //Set array Length to Column Length from Meta Data int primaryKeyPos = 0; int triggerPos = 0; String triggerType = ""; boolean triggerExists = false; boolean primaryExists = false; XMLReader readerXML = new XMLReader(); //XML Reader Class //Drop into an array to keep from parsing XML more than once for (int i = 0; i < columns; i++) { indexTypeArray[i] = readerXML.getNodeValueByFile(configFile, i, "indextype"); columnNamesArray[i] = readerXML.getNodeValueByFile(configFile, i, "columnname"); columnTypesArray[i] = readerXML.getNodeValueByFile(configFile, i, "columntype"); //Get Trigger Position if (indexTypeArray[i].equalsIgnoreCase("PrimaryKey") == true) { primaryExists = true; primaryKeyPos = i + 1; } //Update or Delete Trigger if (indexTypeArray[i].equalsIgnoreCase("triggerUpdate") == true || indexTypeArray[i].equalsIgnoreCase("triggerDelete") == true) { triggerExists = true; triggerPos = i + 1; //Update or Delete Trigger if (indexTypeArray[i].equalsIgnoreCase("triggerUpdate") == true) { triggerType = "Update"; } else { triggerType = "Delete"; } } } //Create Temporary HTML Indexing Folder if (htmlIndexing.exists()) { ActionResultError = "Failure to create directory: " + htmlIndexing + " The index/directory already exists:"; ActionResult = "Failure"; return ActionResult + ActionResultError; } else { //Create Temporary Index IndexManager.createIndex(tempHTMLDir); } Date start = new Date(); //StandardAnalyzer new StandardAnalyzer() = new StandardAnalyzer(); //Initialize Class IndexWriter writer = new IndexWriter(workPath, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); System.out.println("Indexing to directory '" + workPath + "'..."); String dynamicSQL = ""; int currentRow = 0; //Process Next Rows while (rs.next()) { //Create Dynamic SQL if (primaryKeyPos != 0) { if (currentRow > 0) { dynamicSQL = dynamicSQL + ","; } dynamicSQL = dynamicSQL + rs.getInt(primaryKeyPos); } String docStatus = createDocument(writer, rs, columnNamesArray, indexTypeArray, tempHTMLDir); //On Failure if (docStatus.substring(0, 4).equalsIgnoreCase("Fail")) { IndexManager.deleteIndex(tempHTMLDir); IndexManager.deleteIndex(workPath); return docStatus; } //Create Actual Document ++currentRow; } returnResult = "Successfully indexing of " + Integer.toString(currentRow) + " documents."; //Get Table From String //System.out.println(execSQL); String updateTable = ""; String[] words = execSQL.split(" "); int wordPos = 0; int tablePos = 0; for (String word : words) { ++wordPos; if (word.equalsIgnoreCase("FROM") == true) { tablePos = wordPos + 2; } if (tablePos == wordPos) { updateTable = word; break; } } //Must be Records if (triggerExists && primaryExists && updateTable.length() > 0 && currentRow != 0) { if (triggerType.equalsIgnoreCase("Update") == true) { dynamicSQL = "update " + updateTable + " set " + columnNamesArray[triggerPos - 1] + " =1 where " + columnNamesArray[primaryKeyPos - 1] + " in (" + dynamicSQL + ");"; } else { dynamicSQL = "delete from " + updateTable + " where " + columnNamesArray[primaryKeyPos - 1] + " in (" + dynamicSQL + ");"; } System.out.println(dynamicSQL); stmt.execute(dynamicSQL); } System.out.println("Optimizing..." + currentRow + " Documents"); //Close Working Writer writer.close(); //Merge Indexes; IndexManager.mergeIndexes(indexPath, workPath); //Optimization Done Inside Merge //Delete Working Folder IndexManager.deleteIndex(workPath); IndexManager.deleteIndex(tempHTMLDir); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds: = " + ((end.getTime() - start.getTime()) / 1000) + " Seconds"); ActionResult = returnResult; rs.close(); //Close Result Set con.close(); return ActionResult; } catch (Exception e) { IndexManager.deleteIndex(workPath); //Delete Working Folder IndexManager.deleteIndex(tempHTMLDir); e.printStackTrace(); ActionResult = "Failure: " + e + " caught a " + e.getClass() + " with message: " + e.getMessage(); return ActionResult + ActionResultError; } } /** * Creates documents inside of the Lucene Index * * @param writer Index Writer Class * @param rs Result Set for the row of data * @param columnNamesArray The array of column names to be added to the document * @param indexTypeArray The array of column types to be added to the document * @param tempHTMLDir The temporary HTML directory for HTML publishing * @throws Exception * @return ActionResult */ private static String createDocument(IndexWriter writer, ResultSet rs, String columnNamesArray[], String indexTypeArray[], String tempHTMLDir) { try { final Document doc = new Document(); int columns = columnNamesArray.length; /* public Field(String name, String value, Field.Store store, Field.Index index) Store: COMPRESS - Store the original field value in the index in a compressed form. This is useful for long documents and for binary valued fields. YES -Store the original field value in the index. This is useful for short texts like a document's title which should be displayed with the results. The value is stored in its original form, i.e. no analyzer is used before it is stored. NO - Do not store the field value in the index. Index: ANALYZED - Index the tokens produced by running the field's value through an Analyzer. This is useful for common text NOT_ANALYZED - Index the field's value without using an Analyzer, so it can be searched. As no analyzer is used the value will be stored as a single term. This is useful for unique Ids like product numbers. NO - Do not index the field value. This field can thus not be searched, but one can still access its contents provided it is stored. */ for (int i = 0; i < columns; i++) { String columnName = columnNamesArray[i].trim().toLowerCase(); String columnIndexType = indexTypeArray[i]; //Map Column Type To Array String columnValue = rs.getString(columnName); //Get Value But Result Sets are at 1 Not 0 if (columnValue == null) { //Lucene Does Not Like Nulls columnValue = ""; } //System.out.println(" Values: " + columnName + " " + columnIndexType + " " + columnValue + " " + columnValue.length()); //Can't Add Triggers if (columnIndexType.equalsIgnoreCase("TriggerUpdate") == false || columnIndexType.equalsIgnoreCase("TriggerDelete") == false) { if (columnIndexType.equalsIgnoreCase("PrimaryKey") || columnIndexType.equalsIgnoreCase("Keyword") || columnIndexType.equalsIgnoreCase("Date")) { //Format Dates to Correct for Sorting if (columnIndexType.equalsIgnoreCase("Date")) { columnValue = columnValue.replace("/", ""); } doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.NOT_ANALYZED)); } //UnIndexed of UnIndexed else if (columnIndexType.equalsIgnoreCase("UnIndexed")) { doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.NO)); } else if (columnIndexType.equalsIgnoreCase("Text")) { doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.ANALYZED)); } else if (columnIndexType.equalsIgnoreCase("UnStored") || columnIndexType.equalsIgnoreCase("HTML")) { if (columnIndexType.equalsIgnoreCase("HTML") && columnValue.length() != 0) { String htmlString = tempHTMLDir + Utilities.CreateUUID() + ".html"; File htmlFile = new File(htmlString); BufferedWriter out = new BufferedWriter(new FileWriter(htmlString)); out.write(columnValue); out.close(); //Parse Document FileInputStream fis = new FileInputStream(htmlFile); HTMLParser parser = new HTMLParser(fis); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. doc.add(new Field(columnName, parser.getReader())); //Parse HTML } //UnStored Field else { doc.add(new Field(columnName, columnValue, Field.Store.NO, Field.Index.ANALYZED)); } } else if (columnIndexType.equalsIgnoreCase("Binary")) { doc.add(new Field(columnName, columnValue, Field.Store.COMPRESS, Field.Index.NO)); } } } //Add Document Here //System.out.println(doc); writer.addDocument(doc); ActionResult = "Success"; return ActionResult; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); //System.out.println("Failure of DbSchema File: " + xmlFile); } ActionResult = "Failure"; return ActionResult + ActionResultError; } }