is.merkor.cli.Main.java Source code

Java tutorial

Introduction

Here is the source code for is.merkor.cli.Main.java

Source

/*******************************************************************************
 * MerkOrExtraction
 * Copyright (c) 2012 Anna B. Nikulsdttir
 * 
 * License: GNU Lesser General Public License. 
 * See: <http://www.gnu.org/licenses> and <README.markdown>
 * 
 *******************************************************************************/
package is.merkor.cli;

import is.merkor.patternextraction.PatternExtraction;
import is.merkor.patternextraction.PatternInfo;
import is.merkor.patternextraction.PatternMerger;
import is.merkor.preprocessing.IceTagsBinTagsMapping;
import is.merkor.relationextraction.Relations2DBStatements;
import is.merkor.util.FileCommunicatorReading;
import is.merkor.util.FileCommunicatorWriting;
import is.merkor.util.MerkorFile;
import is.merkor.util.MerkorTokenReader;
import is.merkor.util.database.DBPopulation;
import is.merkor.util.database.datatypes.LexicalRelationType;

import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.ParseException;

/**
 * A command line interface to the MerkOr Extraction package - work in progress version.
 * 
 * Run java -jar MerkOrExtraction.jar -help to see options available.
 * 
 * More instructions in file README.markdown included in the MerkOrExtraction package.
 * 
 * Usage 1:
 * Create an SQL file containing mappings of ice-nlp tags and B?N tags for nouns, adjectives
 * and verbs. Filters non-valid words of these classes and writes them out in a text file.
 * 
 * merkorExtractor.jar -bin_mapping -input <inputDir_or_inputFile>
 * 
 * @author Anna B. Nikulasdottir
 * @version 0.8
 *
 */
public class Main {

    public static List<String> processCommandLine(final CommandLine cmdLine) {
        String input = null;
        String output = null;

        List<String> results = new ArrayList<String>();

        if (cmdLine.hasOption("help") || cmdLine.hasOption("h")) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("java -jar MerkOrExtraction.jar", MerkorCommandLineOptions.options);
            results.add("no message");
        }

        if (cmdLine.hasOption("input")) {
            input = cmdLine.getOptionValue("input");

        }
        if (cmdLine.hasOption("output")) {
            output = cmdLine.getOptionValue("output");
        }
        // bin - icenlp tag mapping
        if (cmdLine.hasOption("bin_mapping")) {
            if (null == input)
                results.add("no input given for bin_mapping!");
            else {
                List<String> wordclassList = new ArrayList<String>();
                wordclassList.add("noun");
                wordclassList.add("adjective");
                wordclassList.add("verb");
                long start = System.nanoTime();
                // process the file (or all files in inputDir) for each wordclass
                for (String wc : wordclassList) {

                    IceTagsBinTagsMapping mapper = new IceTagsBinTagsMapping(wc);
                    MerkorTokenReader reader = new MerkorTokenReader(mapper);

                    File inputDir = new File(input);
                    if (inputDir.isDirectory()) {
                        File[] files = inputDir.listFiles();
                        for (int i = 0; i < files.length; i++) {
                            System.out.println("file nr. " + i);
                            if (!files[i].getName().startsWith("."))
                                reader.readTokensFromFile(files[i].getAbsolutePath());
                        }
                    } else if (inputDir.isFile()) {
                        reader.readTokensFromFile(inputDir.getAbsolutePath());
                    }
                }
                double elapsedTime = (double) (System.nanoTime() - start) / 1000000000.0;
                System.out.println("Execution time: " + elapsedTime + " seconds!");
            }
        }
        // db-population
        if (cmdLine.hasOption("fill_db")) {
            String conn = cmdLine.getOptionValue("db_conn");
            String name = cmdLine.getOptionValue("db_name");
            String password = cmdLine.getOptionValue("password");

            if (null == conn || null == name || null == password || null == input) {
                results.add("connection, name, password and inputfile needed for option -fill_db!");
                return results;
            }
            long start = System.nanoTime();
            DBPopulation db_pop = new DBPopulation();
            db_pop.populateDBFromFile(conn, name, password, input);
            double elapsedTime = (double) (System.nanoTime() - start) / 1000000000.0;
            System.out.println("Execution time: " + elapsedTime + " seconds!");
        }

        if (cmdLine.hasOption("extract_patterns")) {
            PatternExtraction extr = new PatternExtraction();
            List<String> patterns = new ArrayList<String>();
            try {
                File inputDir = new File(input);
                if (inputDir.isDirectory()) {
                    File[] dirs = inputDir.listFiles();
                    for (int i = 0; i < dirs.length; i++) {
                        if (!dirs[i].getName().startsWith(".")) {
                            File[] files = dirs[i].listFiles();
                            for (int j = 0; j < files.length; j++) {
                                System.out.println("file nr. " + j);
                                if (!files[j].getName().startsWith(".")) {
                                    MerkorFile current = new MerkorFile(files[j].getAbsolutePath());
                                    for (String line : current) {
                                        extr.processLine(line);
                                        patterns = extr.getExtractedPatternsAsStrings();
                                    }
                                }
                            }
                        }
                    }
                } else {
                    MerkorFile file = new MerkorFile(input);
                    for (String line : file) {
                        extr.processLine(line);
                        patterns = extr.getExtractedPatternsAsStrings();
                    }

                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            FileCommunicatorWriting.writeListNonAppend(output, patterns);
        }
        if (cmdLine.hasOption("merge_patterns")) {
            String relation = cmdLine.getOptionValue("relation");
            String passwd = cmdLine.getOptionValue("password");

            PatternMerger extr = new PatternMerger();
            List<String> patterns = new ArrayList<String>();

            patterns = extr.process(relation, passwd);

            FileCommunicatorWriting.writeListNonAppend(output, patterns);
        }

        if (cmdLine.hasOption("relations2dbstatements")) {
            Relations2DBStatements lexReader = new Relations2DBStatements();
            String directory = "../relationDetectorResults/";

            File[] fileList = FileCommunicatorReading.getFileList(directory);

            for (File file : fileList) {
                String fName = file.getName();
                String relation = fName.substring(0, fName.indexOf('.'));
                int relId = LexicalRelationType.getTypeId(relation);

                if (relId > 0)
                    lexReader.processFile(directory + fName, relId);
                else
                    System.out.println("no relId found for " + relation);
            }
        }

        return results;
    }

    public static void main(String[] args) throws Exception {

        List<String> results = new ArrayList<String>();

        PrintStream out = new PrintStream(System.out, true, "UTF-8");

        CommandLineParser parser = new GnuParser();
        try {

            MerkorCommandLineOptions.createOptions();
            results = processCommandLine(parser.parse(MerkorCommandLineOptions.options, args));
            //          out.print("\n");
            //          for (String str : results) {
            //             if(!str.equals("no message"))
            //                out.println(str);
            //         }
            //          out.print("\n");
            //         if (results.isEmpty()) {   
            //            out.println("nothing found for parameters: ");
            //            for (int i = 0; i < args.length; i++)
            //               out.println("\t" + args[i]);
            //            out.println("for help type: -help or see README.markdown");
            //            out.print("\n");
            //         }
        } catch (ParseException e) {
            System.err.println("Parsing failed.  Reason: " + e.getMessage());
        }
    }
}