edu.cmu.lti.oaqa.annographix.apps.SolrIndexApp.java Source code

Java tutorial

Introduction

Here is the source code for edu.cmu.lti.oaqa.annographix.apps.SolrIndexApp.java

Source

/*
 *  Copyright 2014 Carnegie Mellon University
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package edu.cmu.lti.oaqa.annographix.apps;

import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

import edu.cmu.lti.oaqa.annographix.solr.*;

/**
 * An application that reads text files produced by an annotation
 * pipeline and indexes their content using SOLR.
 * <p>
 * The first text file contains documents. The document
 * text is enclosed between tags &lt;DOC&gt; and &lt;DOC&gt; and occupies
 * exactly one line.
 * <p>
 * The second text file contains annotations in Indri format.
 * 
 * @author Leonid Boytsov
 *
 */
public class SolrIndexApp {
    public static String TEXT_FIELD_ARG = "textField";
    public static String ANNOT_FIELD_ARG = "annotField";

    static void Usage(String err) {
        System.err.println("Error: " + err);
        System.err.println("Usage: -i <Text File> -a <Annotation File> " + "-u <Target Server URI> "
                + " [ -n <Bach Size> default " + batchQty + " ]");

        System.exit(1);
    }

    public static void main(String[] args) {
        Options options = new Options();

        options.addOption("t", null, true, "Text File");
        options.addOption("a", null, true, "Annotation File");
        options.addOption("u", null, true, "Solr URI");
        options.addOption("n", null, true, "Batch size");
        options.addOption(
                OptionBuilder.withLongOpt(TEXT_FIELD_ARG).withDescription("Text field name").hasArg().create());
        options.addOption(OptionBuilder.withLongOpt(ANNOT_FIELD_ARG).withDescription("Annotation field name")
                .hasArg().create());

        CommandLineParser parser = new org.apache.commons.cli.GnuParser();

        try {
            CommandLine cmd = parser.parse(options, args);

            if (cmd.hasOption("t")) {
                docTextFile = cmd.getOptionValue("t");
            } else {
                Usage("Specify Text File");
            }

            if (cmd.hasOption("a")) {
                docAnnotFile = cmd.getOptionValue("a");
            } else {
                Usage("Specify Annotation File");
            }

            if (cmd.hasOption("u")) {
                solrURI = cmd.getOptionValue("u");
            } else {
                Usage("Specify Solr URI");
            }

            if (cmd.hasOption("n")) {
                batchQty = Integer.parseInt(cmd.getOptionValue("n"));
            }

            String textFieldName = UtilConst.DEFAULT_TEXT4ANNOT_FIELD;
            String annotFieldName = UtilConst.DEFAULT_ANNOT_FIELD;

            if (cmd.hasOption(TEXT_FIELD_ARG)) {
                textFieldName = cmd.getOptionValue(TEXT_FIELD_ARG);
            }
            if (cmd.hasOption(ANNOT_FIELD_ARG)) {
                annotFieldName = cmd.getOptionValue(ANNOT_FIELD_ARG);
            }

            System.out.println(String.format("Annotated text field: '%s', annotation field: '%s'", textFieldName,
                    annotFieldName));

            // Ignoring return value here
            SolrUtils.parseAndCheckConfig(solrURI, textFieldName, annotFieldName);

            System.out.println("Config is fine!");

            DocumentReader.readDoc(docTextFile, textFieldName, docAnnotFile, batchQty,
                    new SolrDocumentIndexer(solrURI, textFieldName, annotFieldName));

        } catch (ParseException e) {
            Usage("Cannot parse arguments");
        } catch (Exception e) {
            System.err.println("Terminating due to an exception: " + e);
            System.exit(1);
        }

    }

    static String docTextFile = null, docAnnotFile = null, solrURI = null;
    static int batchQty = 100;
}