edu.isi.karma.rdf.OfflineRdfGenerator.java Source code

Java tutorial

Introduction

Here is the source code for edu.isi.karma.rdf.OfflineRdfGenerator.java

Source

/**
 * *****************************************************************************
 * Copyright 2012 University of Southern California
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 *
 * This code was developed by the Information Integration Group as part of the
 * Karma project at the Information Sciences Institute of the University of
 * Southern California. For more information, publications, and related
 * projects, please see: http://www.isi.edu/integration
 * ****************************************************************************
 */

package edu.isi.karma.rdf;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Modifier;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.Set;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.reflections.Reflections;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.StmtIterator;

import edu.isi.karma.config.ModelingConfiguration;
import edu.isi.karma.config.ModelingConfigurationRegistry;
import edu.isi.karma.controller.update.UpdateContainer;
import edu.isi.karma.er.helper.PythonRepository;
import edu.isi.karma.er.helper.PythonRepositoryRegistry;
import edu.isi.karma.kr2rml.ContextIdentifier;
import edu.isi.karma.kr2rml.URIFormatter;
import edu.isi.karma.kr2rml.mapping.R2RMLMappingIdentifier;
import edu.isi.karma.kr2rml.mapping.WorksheetR2RMLJenaModelParser;
import edu.isi.karma.kr2rml.planning.UserSpecifiedRootStrategy;
import edu.isi.karma.kr2rml.writer.JSONKR2RMLRDFWriter;
import edu.isi.karma.kr2rml.writer.KR2RMLRDFWriter;
import edu.isi.karma.kr2rml.writer.N3KR2RMLRDFWriter;
import edu.isi.karma.metadata.KarmaMetadataManager;
import edu.isi.karma.metadata.PythonTransformationMetadata;
import edu.isi.karma.metadata.UserConfigMetadata;
import edu.isi.karma.metadata.UserPreferencesMetadata;
import edu.isi.karma.modeling.Namespaces;
import edu.isi.karma.modeling.Uris;
import edu.isi.karma.modeling.semantictypes.SemanticTypeUtil;
import edu.isi.karma.rdf.GenericRDFGenerator.InputType;
import edu.isi.karma.util.DBType;
import edu.isi.karma.util.EncodingDetector;
import edu.isi.karma.webserver.ContextParametersRegistry;
import edu.isi.karma.webserver.KarmaException;
import edu.isi.karma.webserver.ServletContextParameterMap;
import edu.isi.karma.webserver.ServletContextParameterMap.ContextParameter;

public class OfflineRdfGenerator {

    private static Logger logger = LoggerFactory.getLogger(OfflineRdfGenerator.class);

    private String inputType;
    private String inputEncoding;
    private String inputDelimiter;
    private String inputTextQualifier;
    private String inputHeaderStartIndex;
    private String inputDataStartIndex;

    private String modelFilePath;
    private String modelURLString;
    private String baseURI;
    private String outputFilePath;
    private String outputFileJSONPath;
    private String bloomFiltersFilePath;
    private List<KR2RMLRDFWriter> writers;
    private URL modelURL;
    private String dbtypeStr;
    private String username;
    private String password;
    private String hostname;
    private String encoding;
    private String sourceFilePath;
    private String dBorSIDName;
    private String tablename;
    private String topkrows;
    private String queryFile;
    private String portnumber;
    private String sMaxNumLines;
    private String sourceName;
    private String selectionName;
    private int port;
    private DBType dbType;
    private File inputFile;
    private int maxNumLines;
    private String rootTripleMap;
    private List<String> killTripleMap;
    private List<String> stopTripleMap;
    private List<String> POMToKill;
    private String contextFile;
    private String contextURLString;
    private URL contextURL;
    private ServletContextParameterMap contextParameters;

    public OfflineRdfGenerator(CommandLine cl) {

        this.writers = new LinkedList<>();
        parseCommandLineOptions(cl);
    }

    public static void main(String[] args) {

        Options options = createCommandLineOptions();
        CommandLine cl = CommandLineArgumentParser.parse(args, options, OfflineRdfGenerator.class.getSimpleName());
        if (cl == null) {
            return;
        }

        try {
            OfflineRdfGenerator generator = new OfflineRdfGenerator(cl);

            long start = System.currentTimeMillis();
            generator.generate();
            long end = System.currentTimeMillis();

            logger.info("Time to generate RDF:" + (float) (end - start) / (1000 * 60) + " mins");

        } catch (Exception e) {
            logger.error("Error occured while generating RDF!", e);
        }
    }

    private void generate() throws Exception {
        if (validateCommandLineOptions()) {
            createModelURL();
            setupKarmaMetadata();
            generateRDF();
            closeWriters();
        }
    }

    private void generateRDF() throws Exception {
        /**
         * Generate RDF on the source type *
         */
        long l = System.currentTimeMillis();

        // Database table
        if (inputType.equals("DB") || inputType.equals("SQL")) {
            generateRdfFromDatabaseTable();
        } // File based worksheets such as JSON, XML, CSV
        else {
            generateRdfFromFile();
        }

        logger.info("done after {}", System.currentTimeMillis() - l);
        if (outputFilePath != null) {
            logger.info("RDF published at: " + outputFilePath);
        }
        if (outputFileJSONPath != null) {
            logger.info("JSON-LD published at: " + outputFileJSONPath);
        }
    }

    private void setupKarmaMetadata() throws KarmaException {

        ContextParametersRegistry contextParametersRegistry = ContextParametersRegistry.getInstance();
        contextParameters = contextParametersRegistry.registerByKarmaHome(null);

        UpdateContainer uc = new UpdateContainer();
        KarmaMetadataManager userMetadataManager = new KarmaMetadataManager(contextParameters);
        userMetadataManager.register(new UserPreferencesMetadata(contextParameters), uc);
        userMetadataManager.register(new UserConfigMetadata(contextParameters), uc);
        userMetadataManager.register(new PythonTransformationMetadata(contextParameters), uc);
        PythonRepository pythonRepository = new PythonRepository(false,
                contextParameters.getParameterValue(ContextParameter.USER_PYTHON_SCRIPTS_DIRECTORY));
        PythonRepositoryRegistry.getInstance().register(pythonRepository);

        SemanticTypeUtil.setSemanticTypeTrainingStatus(false);
        ModelingConfiguration modelingConfiguration = ModelingConfigurationRegistry.getInstance()
                .register(contextParameters.getId());
        modelingConfiguration.setLearnerEnabled(false); // disable automatic learning

    }

    protected void parseCommandLineOptions(CommandLine cl) {
        inputType = (String) cl.getOptionValue("sourcetype");
        inputEncoding = (String) cl.getOptionValue("encoding");
        inputDelimiter = (String) cl.getOptionValue("delimiter");
        if (inputDelimiter != null) {
            if (inputDelimiter.equalsIgnoreCase("tab"))
                inputDelimiter = "\t";
            else if (inputDelimiter.equalsIgnoreCase("space"))
                inputDelimiter = " ";
        }
        inputTextQualifier = (String) cl.getOptionValue("textqualifier");
        inputHeaderStartIndex = (String) cl.getOptionValue("headerindex");
        inputDataStartIndex = (String) cl.getOptionValue("dataindex");

        modelFilePath = (String) cl.getOptionValue("modelfilepath");
        modelURLString = (String) cl.getOptionValue("modelurl");
        outputFilePath = (String) cl.getOptionValue("outputfile");
        outputFileJSONPath = (String) cl.getOptionValue("jsonoutputfile");
        baseURI = (String) cl.getOptionValue("baseuri");
        bloomFiltersFilePath = (String) cl.getOptionValue("outputbloomfilter");
        selectionName = (String) cl.getOptionValue("selection");
        rootTripleMap = (String) cl.getOptionValue("root");
        String killTripleMap = (String) cl.getOptionValue("killtriplemap");
        String stopTripleMap = (String) cl.getOptionValue("stoptriplemap");
        String POMToKill = (String) cl.getOptionValue("pomtokill");
        contextFile = (String) cl.getOptionValue("contextfile");
        contextURLString = (String) cl.getOptionValue("contexturl");
        if (rootTripleMap == null) {
            rootTripleMap = "";
        }
        if (killTripleMap == null) {
            this.killTripleMap = new ArrayList<>();
        } else {
            this.killTripleMap = new ArrayList<>(Arrays.asList(killTripleMap.split(",")));
            int size = this.killTripleMap.size();
            for (int i = 0; i < size; i++) {
                String t = this.killTripleMap.remove(0);
                this.killTripleMap.add(Namespaces.KARMA_DEV + t);
            }
        }
        if (stopTripleMap == null) {
            this.stopTripleMap = new ArrayList<>();
        } else {
            this.stopTripleMap = new ArrayList<>(Arrays.asList(stopTripleMap.split(",")));
            int size = this.stopTripleMap.size();
            for (int i = 0; i < size; i++) {
                String t = this.stopTripleMap.remove(0);
                this.stopTripleMap.add(Namespaces.KARMA_DEV + t);
            }
        }
        if (POMToKill == null) {
            this.POMToKill = new ArrayList<>();
        } else {
            this.POMToKill = new ArrayList<>(Arrays.asList(POMToKill.split(",")));
            int size = this.POMToKill.size();
            for (int i = 0; i < size; i++) {
                String t = this.POMToKill.remove(0);
                this.POMToKill.add(Namespaces.KARMA_DEV + t);
            }
        }
        parseDatabaseCommandLineOptions(cl);
        parseFileCommandLineOptions(cl);

    }

    protected void parseDatabaseCommandLineOptions(CommandLine cl) {

        dbtypeStr = (String) cl.getOptionValue("dbtype");
        hostname = (String) cl.getOptionValue("hostname");
        username = (String) cl.getOptionValue("username");
        password = (String) cl.getOptionValue("password");
        encoding = (String) cl.getOptionValue("encoding");
        dBorSIDName = (String) cl.getOptionValue("dbname");
        tablename = (String) cl.getOptionValue("tablename");
        topkrows = (String) cl.getOptionValue("topkrows");
        queryFile = (String) cl.getOptionValue("queryfile");
        portnumber = (String) cl.getOptionValue("portnumber");
    }

    protected void parseFileCommandLineOptions(CommandLine cl) {

        sourceFilePath = (String) cl.getOptionValue("filepath");
        sMaxNumLines = (String) cl.getOptionValue("maxNumLines");
        sourceName = (String) cl.getOptionValue("sourcename");
    }

    protected boolean validateCommandLineOptions() throws IOException {

        if ((modelURLString == null && modelFilePath == null)
                || (outputFilePath == null && outputFileJSONPath == null) || inputType == null) {
            logger.error("Mandatory value missing. Please provide argument value "
                    + "for sourcetype, (modelfilepath or modelurl) and (outputfile or jsonoutputfile).");
            return false;
        }

        if (!inputType.equalsIgnoreCase("DB") && !inputType.equalsIgnoreCase("CSV")
                && !inputType.equalsIgnoreCase("XML") && !inputType.equalsIgnoreCase("JSON")
                && !inputType.equalsIgnoreCase("SQL") && !inputType.equalsIgnoreCase("AVRO")
                && !inputType.equalsIgnoreCase("JL")) {
            logger.error("Invalid source type: " + inputType
                    + ". Please choose from: DB, SQL, CSV, XML, JSON, AVRO, JL.");
            return false;
        }
        return true;
    }

    private boolean validateFileCommandLineOptions() {
        inputFile = new File(sourceFilePath);
        if (!inputFile.exists()) {
            logger.error("File not found: " + inputFile.getAbsolutePath());
            return false;
        }
        if (encoding == null) {
            encoding = EncodingDetector.detect(inputFile);
        }

        maxNumLines = -1;
        if (sMaxNumLines != null) {
            maxNumLines = Integer.parseInt(sMaxNumLines);
        }

        if (sourceName == null) {
            logger.error("You need to supply a value for '--sourcename'");
            return false;
        }
        return true;
    }

    private void createModelURL() throws IOException {
        /**
         * VALIDATE THE OPTIONS *
         */
        if (modelFilePath != null) {
            File modelFile = new File(modelFilePath);
            if (!modelFile.exists()) {
                throw new IOException("File not found: " + modelFile.getAbsolutePath());
            }
            modelURL = modelFile.toURI().toURL();
        } else {
            modelURL = new URL(modelURLString);
        }
        if (contextFile != null) {
            File tmp = new File(contextFile);
            if (!tmp.exists()) {
                throw new IOException("File not found: " + tmp.getAbsolutePath());
            }
            contextURL = tmp.toURI().toURL();
        } else if (contextURLString != null) {
            contextURL = new URL(contextURLString);
        }
        if (baseURI != null && !baseURI.trim().isEmpty())
            return;
        try {
            R2RMLMappingIdentifier modelIdentifier = new R2RMLMappingIdentifier(modelURL.toString(), modelURL,
                    null);
            Model model = WorksheetR2RMLJenaModelParser.loadSourceModelIntoJenaModel(modelIdentifier);
            Property rdfTypeProp = model.getProperty(Uris.RDF_TYPE_URI);
            Property baseURIProp = model.getProperty(Uris.KM_HAS_BASEURI);
            RDFNode node = model.getResource(Uris.KM_R2RML_MAPPING_URI);
            ResIterator res = model.listResourcesWithProperty(rdfTypeProp, node);
            List<Resource> resList = res.toList();
            for (Resource r : resList) {
                if (r.hasProperty(baseURIProp)) {
                    baseURI = r.getProperty(baseURIProp).asTriple().getObject().toString();
                    baseURI = baseURI.replace("\"", "");
                }
            }
        } catch (IOException e) {

        }
    }

    private void generateRdfFromDatabaseTable() throws Exception {
        if (!validateDatabaseCommandLineOptions()) {
            logger.error("Unable to generate RDF from database table!");
            return;
        }

        DatabaseTableRDFGenerator dbRdfGen = new DatabaseTableRDFGenerator(dbType, hostname, port, username,
                password, dBorSIDName, encoding, selectionName, contextParameters);
        ContextIdentifier contextId = null;
        if (contextURL != null) {

            contextId = new ContextIdentifier(contextURL.getQuery(), contextURL, null);
        }
        if (inputType.equals("DB")) {
            R2RMLMappingIdentifier id = new R2RMLMappingIdentifier(tablename, modelURL, null);
            createWriters();
            dbRdfGen.generateRDFFromTable(tablename, topkrows, writers, id, contextId, baseURI);
        } else {
            String query = loadQueryFromFile();
            R2RMLMappingIdentifier id = new R2RMLMappingIdentifier(modelURL.toString(), modelURL, null);
            createWriters();
            dbRdfGen.generateRDFFromSQL(query, writers, id, contextId, baseURI);
        }

    }

    private boolean validateDatabaseCommandLineOptions() {
        if (encoding == null)
            encoding = "UTF-8";
        port = 0;
        try {
            port = Integer.parseInt(portnumber);
        } catch (Throwable t) {
            logger.error("Error occured while parsing value for portnumber." + " Provided value: " + portnumber);
            return false;
        }

        // Validate the arguments
        if (dbtypeStr == null || dbtypeStr.equals("") || hostname == null || hostname.equals("") || username == null
                || username.equals("") || password == null || password.equals("") || dBorSIDName == null
                || dBorSIDName.equals("") || (inputType.equals("DB") && (tablename == null || tablename.equals("")))
                || (inputType.equals("SQL") && (queryFile == null || queryFile.equals("")))) {
            if (inputType.equals("DB"))
                logger.error("A mandatory value is missing for fetching data from "
                        + "a database. Please provide argument values for dbtype, hostname, "
                        + "username, password, portnumber, dbname and tablename.");
            else
                logger.error("A mandatory value is missing for fetching data from "
                        + "a database. Please provide argument values for dbtype, hostname, "
                        + "username, password, portnumber, dbname and queryfile.");
            return false;
        }

        dbType = DBType.valueOf(dbtypeStr);
        if (dbType == null) {
            logger.error("Unidentified database type. Valid values: " + "Oracle, MySQL, SQLServer, PostGIS");
            return false;
        }
        return true;
    }

    private String loadQueryFromFile() throws IOException {
        File file = new File(queryFile);
        String queryFileEncoding = EncodingDetector.detect(file);
        String query = EncodingDetector.getString(file, queryFileEncoding);
        return query;
    }

    protected void closeWriters() {
        for (KR2RMLRDFWriter writer : writers) {
            writer.flush();
            writer.close();
        }
    }

    protected void createWriters() throws Exception {
        createN3Writer();
        createBloomFilterWriter();
    }

    protected void createN3Writer() throws UnsupportedEncodingException, FileNotFoundException {

        if (outputFilePath != null) {
            OutputStreamWriter fw = new OutputStreamWriter(new FileOutputStream(outputFilePath), "UTF-8");
            BufferedWriter bw = new BufferedWriter(fw);
            PrintWriter pw = new PrintWriter(bw);
            N3KR2RMLRDFWriter n3Writer = new N3KR2RMLRDFWriter(new URIFormatter(), pw);

            if (baseURI != null) {
                n3Writer.setBaseURI(baseURI);
            }
            writers.add(n3Writer);
        }
        if (outputFileJSONPath != null) {
            JSONKR2RMLRDFWriter jsonWriter = new JSONKR2RMLRDFWriter(new PrintWriter(outputFileJSONPath), baseURI);
            writers.add(jsonWriter);
        }
    }

    protected void createBloomFilterWriter() throws Exception {
        if (bloomFiltersFilePath != null && !bloomFiltersFilePath.trim().isEmpty()) {
            PrintWriter bloomfilterpw = new PrintWriter(new File(bloomFiltersFilePath));
            logger.info(bloomFiltersFilePath);
            writers.add(createBloomFilterWriter(bloomfilterpw, true, baseURI));
        }

    }

    private KR2RMLRDFWriter createBloomFilterWriter(PrintWriter bloomfilterpw, Boolean isRDF, String baseURI)
            throws Exception {

        Reflections reflections = new Reflections("edu.isi.karma.kr2rml.writer");

        Set<Class<? extends KR2RMLRDFWriter>> subTypes = reflections.getSubTypesOf(KR2RMLRDFWriter.class);

        for (Class<? extends KR2RMLRDFWriter> subType : subTypes) {
            if (!Modifier.isAbstract(subType.getModifiers()) && !subType.isInterface()
                    && subType.getName().equals("BloomFilterKR2RMLRDFWriter"))
                try {
                    KR2RMLRDFWriter writer = subType.newInstance();
                    writer.setWriter(bloomfilterpw);
                    Properties p = new Properties();
                    p.setProperty("is.rdf", isRDF.toString());
                    p.setProperty("base.uri", baseURI);
                    writer.initialize(p);
                    return writer;
                } catch (Exception e) {
                    bloomfilterpw.close();
                    throw new Exception("Unable to instantiate bloom filter writer", e);
                }
        }

        bloomfilterpw.close();
        throw new Exception("Bloom filter writing support not enabled.  Please recompile with -Pbloom");
    }

    private void generateRdfFromFile() throws Exception {
        if (!validateFileCommandLineOptions()) {
            logger.error("Unable to generate RDF from file because of invalid configuration");
            return;
        }
        R2RMLMappingIdentifier id = new R2RMLMappingIdentifier(sourceName, modelURL, null);

        createWriters();
        GenericRDFGenerator rdfGenerator = new GenericRDFGenerator(selectionName);
        rdfGenerator.addModel(id);

        InputType inputType = null;
        if (this.inputType.equalsIgnoreCase("CSV"))
            inputType = InputType.CSV;
        else if (this.inputType.equalsIgnoreCase("JSON"))
            inputType = InputType.JSON;
        else if (this.inputType.equalsIgnoreCase("XML"))
            inputType = InputType.XML;
        else if (this.inputType.equalsIgnoreCase("AVRO"))
            inputType = InputType.AVRO;
        else if (this.inputType.equalsIgnoreCase("JL"))
            inputType = InputType.JL;
        Model model = rdfGenerator.getModelParser(sourceName).getModel();
        if (rootTripleMap != null && !rootTripleMap.isEmpty()) {
            StmtIterator itr = model.listStatements(null, model.getProperty(Uris.KM_NODE_ID_URI), rootTripleMap);
            Resource subject = null;
            while (itr.hasNext()) {
                subject = itr.next().getSubject();
            }
            if (subject != null) {
                itr = model.listStatements(null, model.getProperty(Uris.RR_SUBJECTMAP_URI), subject);
                while (itr.hasNext()) {
                    rootTripleMap = itr.next().getSubject().toString();
                }
            }
        }
        RDFGeneratorRequest request = new RDFGeneratorRequest(sourceName, inputFile.getName());
        request.setInputFile(inputFile);
        request.setDataType(inputType);
        if (inputEncoding != null)
            request.setEncoding(inputEncoding);
        if (inputDelimiter != null)
            request.setDelimiter(this.inputDelimiter);
        if (inputTextQualifier != null)
            request.setTextQualifier(inputTextQualifier);
        if (inputHeaderStartIndex != null)
            request.setHeaderStartIndex(Integer.parseInt(inputHeaderStartIndex));
        if (inputDataStartIndex != null)
            request.setDataStartIndex(Integer.parseInt(inputDataStartIndex));

        request.setMaxNumLines(maxNumLines);
        request.setAddProvenance(false);
        request.addWriters(writers);
        request.setPOMToKill(POMToKill);
        request.setTripleMapToKill(killTripleMap);
        request.setTripleMapToStop(stopTripleMap);
        request.setStrategy(new UserSpecifiedRootStrategy(rootTripleMap));
        request.setContextParameters(contextParameters);
        if (contextURL != null) {
            ContextIdentifier contextId = new ContextIdentifier(contextURL.getQuery(), contextURL, null);
            rdfGenerator.addContext(contextId);
            request.setContextName(contextURL.getQuery());
        }
        rdfGenerator.generateRDF(request);
    }

    private static Options createCommandLineOptions() {

        Options options = new Options();

        options.addOption(new Option("sourcetype", "sourcetype", true,
                "type of source. Valid values: DB, SQL, CSV, JSON, XML"));
        options.addOption(new Option("delimiter", "delimiter", true, "column delimter for CSV file"));
        options.addOption(new Option("encoding", "encoding", true, "source encoding"));
        options.addOption(new Option("textqualifier", "textQualifier", true, "text qualifier for CSV file"));
        options.addOption(new Option("headerindex", "headerindex", true, "header index for CSV file"));
        options.addOption(new Option("dataindex", "dataindex", true, "data start index for CSV file"));
        options.addOption(new Option("filepath", "filepath", true, "location of the input file"));
        options.addOption(new Option("modelfilepath", "modelfilepath", true, "location of the model file"));
        options.addOption(new Option("modelurl", "modelurl", true, "location of the model"));
        options.addOption(new Option("sourcename", "sourcename", true, "name of the source in the model to use"));
        options.addOption(new Option("outputfile", "outputfile", true, "location of the output file"));
        options.addOption(new Option("dbtype", "dbtype", true,
                "database type. Valid values: Oracle, MySQL, SQLServer, PostGIS"));
        options.addOption(new Option("hostname", "hostname", true, "hostname for database connection"));
        options.addOption(new Option("username", "username", true, "username for database connection"));
        options.addOption(new Option("password", "password", true, "password for database connection"));
        options.addOption(new Option("portnumber", "portnumber", true, "portnumber for database connection"));
        options.addOption(new Option("dbname", "dbname", true, "database or SID name for database connection"));
        options.addOption(new Option("tablename", "tablename", true, "hostname for database connection"));
        options.addOption(
                new Option("topkrows", "topkrows", true, "number of top k rows to select from the table"));
        options.addOption(new Option("queryfile", "queryfile", true, "query file for loading data"));
        options.addOption(new Option("outputbloomfilter", "bloomfiltersfile", true, "generate bloom filters"));
        options.addOption(new Option("baseuri", "base URI", true, "specifies base uri"));
        options.addOption(new Option("selection", "selection", true, "specifies selection name"));
        options.addOption(new Option("root", "root", true, "specifies root"));
        options.addOption(new Option("killtriplemap", "killtriplemap", true, "specifies TripleMap to kill"));
        options.addOption(new Option("stoptriplemap", "stoptriplemap", true, "specifies TripleMap to stop"));
        options.addOption(new Option("pomtokill", "pomtokill", true, "specifies POM to kill"));
        options.addOption(new Option("jsonoutputfile", "jsonoutputfile", true, "specifies JSONOutputFile"));
        options.addOption(new Option("contextfile", "contextile", true, "specifies global context file"));
        options.addOption(new Option("contexturl", "contexturl", true, "specifies global context url"));
        options.addOption(new Option("help", "help", false, "print this message"));

        return options;
    }
}