Java tutorial
package com.berico.clavin.resolver.impl.lucene; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import net.sourceforge.argparse4j.ArgumentParsers; import net.sourceforge.argparse4j.inf.ArgumentParser; import net.sourceforge.argparse4j.inf.ArgumentParserException; import net.sourceforge.argparse4j.inf.Namespace; import org.apache.commons.io.IOUtils; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.spatial.SpatialStrategy; import com.berico.clavin.gazetteer.Place; import com.berico.clavin.util.Serializer; import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.shape.Shape; /*##################################################################### * * CLAVIN (Cartographic Location And Vicinity INdexer) * --------------------------------------------------- * * Copyright (C) 2012-2013 Berico Technologies * http://clavin.bericotechnologies.com * * ==================================================================== * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * ==================================================================== * * IndexBuilder.java * *###################################################################*/ /** * This is an attempt to simplify developer's lives in extending the CLAVIN index * creation process. Extend this class and implement your Gazetteer specific * logic and let us do the heavy lifting of configuring the index and * writing the records to disk. * * Extend this class, and you will be required to implement five methods: * * String getDescription(); = Console description of what your IndexBuilder does. * * void extend(ArgumentParser); = You are given an opportunity to register arguments * for parsing via command line input. For directions on how to use, refer to the * Argparse4j library (http://argparse4j.sourceforge.net/). * * void initialize(Namespace); = This is the parsed input from the command line. * Collect the parameters you need to initialize your application and start up any * services or connections you may need. * * void begin(BuilderContext); = Start calling you datasource, converting records to * {@link Place} objects. Call BuilderContext.add(Place) to index the record. * * void cleanup(); = Called when the begin() method falls out of scope. IndexBuilder * guarantee's this will be called regardless of whether begin() succeeds or not, * unless you do something crazy like throw a {@link RuntimeException}. * * I've added some helper methods, partly because I really hate writing * "System.out.println(String.format("blah blah %s", object));" everytime * I want to output to the console. You're welcome to use the same methods: * * p(msg); = Print a message to the console (no new line) * p(template, objects...); = Print a message to the console using String.format semantics. * pl(msg); = Print a message on a new line to the console. * pl(template, objects...); = New line, String.format semantics. * br(); = Equivalent of an html <br /> (new line) * hr(); = Equivalent of an html <hr /> (line across the screen) */ public abstract class IndexBuilder implements BuilderContext { /** * Provide a description for this particular implementation of the IndexBuilder. * @return Description to show on the console. */ protected abstract String getDescription(); /** * Extend the argument parser with your own custom parameters. * @param parser precreated argument parser. */ protected abstract void extend(ArgumentParser parser); /** * Provide the parsed argument input to the derived class so it can configure * itself. * @param namespace Parsed argument input. */ protected abstract void initialize(Namespace namespace); /** * Begin pulling Gazetteer entries from wherever, calling "addPlaceToIndex" * to store them. * @param context Context of index building process. * @throws Exception Exceptions may be thrown at any point; the IndexBuilder * will catch the Exception and call cleanup() so you can release resources. */ protected abstract void begin(BuilderContext context) throws Exception; /** * Here's your chance to cleanup. * @throws Exception you can throw an exception in cleanup, but it won't be * caught. */ protected abstract void cleanup() throws Exception; /** * Location of the index directory. */ protected String indexDirectory; /** * Lucene Spatial Context */ protected SpatialContext spatialContext; /** * Spatial Indexing Strategy */ protected SpatialStrategy spatialStrategy; /** * Lucene Index Writer */ protected IndexWriter indexWriter; /** * Total number of records processed. */ protected long totalNumberProcessed = 0l; /** * Reusable index fields. */ private TextField indexNameField = new TextField(FieldConstants.NAME, "", Field.Store.YES); private StoredField placeField = new StoredField(FieldConstants.PLACE, ""); private IntField recordIdField = new IntField(FieldConstants.PLACE_ID, -1, Field.Store.NO); private NumericDocValuesField populationField = new NumericDocValuesField(FieldConstants.POPULATION, -1l); private StoredField geospatialField = new StoredField(FieldConstants.GEOMETRY, ""); /** * Instantiate the IndexBuilder with the command line input. * @param args * @throws IOException */ public IndexBuilder(String[] args) throws Exception { // Give the user a nice warm welcome. printBanner(); // Parse the input arguments. parseArguments(args); br(); pl("Starting the Index Builder..."); String absoluteIndexDir = new File(indexDirectory).getAbsolutePath(); pl("> Writing index to: %s", absoluteIndexDir); pl("> Each dot represents 1,000 processed records."); hr(); pl("> Press Control-C (or unplug your computer) to terminate"); pl("> the indexing process."); hr(); // Initialize the index. initializeIndex(); try { // Stopwatch Date start = new Date(); // Delegate processing to derived classes. begin(this); // Stop! Date end = new Date(); // Calculate total time. long totalTime = end.getTime() - start.getTime(); // Pretty print dates. DateFormat df = new SimpleDateFormat("HH:mm:ss"); // Print elapsed time. pl(""); hr(); pl("Process started: %s, ended: %s; elasped time: %s seconds.", df.format(start), df.format(end), totalTime / 1000); br(); br(); } catch (Exception e) { pl("An error occurred while building the index."); e.printStackTrace(); } // Instruct the derived class to release resources and cleanup. cleanup(); } /** * Parse the command line arguments, retrieving configuration needed for * index building. * @param arguments command line input */ protected void parseArguments(String[] arguments) { ArgumentParser parser = ArgumentParsers.newArgumentParser("clavin").description(getDescription()); // Register a variable called "index" parser.addArgument("index").metavar("index-directory").type(String.class).required(true) .help("Index directory location (will create if it doesn't exist)."); // Allow the derived class to extend the arguments parser. extend(parser); Namespace namespace = null; try { // Parse the input arguments namespace = parser.parseArgs(arguments); // Collect the index directory. indexDirectory = namespace.getString("index"); } catch (ArgumentParserException ex) { parser.handleError(ex); System.exit(1); } // Delegate the collection of arguments to the derived class. initialize(namespace); } /** * Initialize the Lucene index. * @throws IOException */ protected void initializeIndex() throws IOException { final LuceneComponentsFactory factory = new LuceneComponentsFactory(indexDirectory); factory.initializeWriter(); indexWriter = factory.getIndexWriter(); spatialContext = factory.getSpatialContext(); spatialStrategy = factory.getSpatialStrategy(); // Register a shutdown hook to close the indexes when the process terminates. Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { try { factory.getIndexWriter().close(); } catch (IOException e) { e.printStackTrace(); } factory.getIndex().close(); } }); } /** * Add a place to the index. This is a method of the BuilderContext * implementation. * @param place Place to add to the index. */ public void add(Place place) { try { addPlaceToIndex(place); incrementProcessCounter(); } catch (Exception ex) { throw new RuntimeException(ex); } } /** * Increment the total processed counter and print a message * if a threshold is reached. */ protected void incrementProcessCounter() { totalNumberProcessed++; if (totalNumberProcessed % 1000 == 0) p("."); } /** * Get the total number of records processed. * @return total processed. */ public long getTotalProcessed() { return totalNumberProcessed; } /** * Call this to add a place to the index. * @param place Place to add. * @throws IOException */ public void addPlaceToIndex(Place place) throws IOException { Document document = buildDocument(place); indexWriter.addDocument(document); } /** * Builds a Lucene document to be added to the index based on a * specified name for the location and the corresponding * {@link Place} object. * * @param name name to serve as index key * @param place GeoName Entry * @return */ private Document buildDocument(Place place) { // in case you're wondering, yes, this is a non-standard use of // the Lucene Document construct Document doc = new Document(); // this is essentially the key we'll try to match location // names against addIndexNameField(doc, place.getName()); if (!place.getAsciiName().equals(place.getName())) { addIndexNameField(doc, place.getAsciiName()); } for (String altName : place.getAlternateNames()) { if (!altName.equals(place.getName()) && !altName.equals(place.getAsciiName())) { addIndexNameField(doc, altName); } } // this is the payload we'll return when matching location // names to gazetteer records addPlaceField(doc, Serializer.Default.serialize(place)); // TODO: use geonameID to link administrative subdivisions to // each other addRecordIdField(doc, place.getId()); // we'll initially sort match results based on population addPopulationField(doc, place.getPopulation()); // we'll create a new Spatial geometry from the centroid of the geoname location Shape centroid = spatialContext.makePoint(place.getCenter().getLongitude(), place.getCenter().getLatitude()); // add a deserializable representation of the shape to the document. addGeospatialField(doc, centroid); // we will add the field to the index for (Field f : spatialStrategy.createIndexableFields(centroid)) { doc.add(f); } return doc; } /** * Add an Indexed Name Field. * @param doc Document to set on. * @param value Field value. */ private void addIndexNameField(Document doc, String value) { indexNameField.setStringValue(value); doc.add(indexNameField); } /** * Add the Place field to the document. * @param doc Document to set on. * @param place String representation of place. */ private void addPlaceField(Document doc, String place) { placeField.setStringValue(place); doc.add(placeField); } /** * Add the Id field to the document. * @param doc Document to set on. * @param recordId Id of the record. */ private void addRecordIdField(Document doc, int recordId) { recordIdField.setIntValue(recordId); doc.add(recordIdField); } /** * Add the population to the document. * @param doc Document to set on. * @param population Population size. */ private void addPopulationField(Document doc, long population) { populationField.setLongValue(population); doc.add(populationField); } /** * Add the geospatial index field. * @param doc Document to set on. * @param shape Geospatial value (typically a Point, like a lat/lon). */ @SuppressWarnings("deprecation") private void addGeospatialField(Document doc, Shape shape) { // TODO: Maybe do this more elegantly with the Spatial4J API's // ShapeReaderWriter... geospatialField.setStringValue(spatialContext.toString(shape)); doc.add(geospatialField); } /** * Print a message to the console. * * @param message * Message to print. */ public static void p(String message) { System.out.print(message); } /** * Print a message to the console, using a string formatter. * * @param template * Template string * @param objects * context */ public static void p(String template, Object... objects) { p(String.format(template, objects)); } /** * Print a message to the console on its own line. * * @param message * Message to print. */ public static void pl(String message) { System.out.println(message); } /** * Print a message to the console on its own line, using a string formatter. * * @param template * Template string * @param objects * context */ public static void pl(String template, Object... objects) { pl(String.format(template, objects)); } /** * Print a line return to the console. */ public static void br() { p("\n"); } /** * Print a horizontal rule (line) to the console. */ public static void hr() { pl("-----------------------------------------------------------"); } /** * Print a nice CLAVIN Banner, hurray! */ public void printBanner() { // Get the current classloader. ClassLoader cl = Thread.currentThread().getContextClassLoader(); // Retrieve the banner text from the text file in the src/main/resources // directory. InputStream is = cl.getResourceAsStream("banner.txt"); try { // Pull the banner from the input stream String banner = IOUtils.toString(is); // Print the banner. p(banner); } catch (IOException e) { // Oops, didn't work! e.printStackTrace(); } } }