esiptestbed.mudrod.main.MudrodEngine.java Source code

Java tutorial

Introduction

Here is the source code for esiptestbed.mudrod.main.MudrodEngine.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License"); you 
 * may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package esiptestbed.mudrod.main;

import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Properties;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import esiptestbed.mudrod.discoveryengine.DiscoveryEngineAbstract;
import esiptestbed.mudrod.discoveryengine.MetadataDiscoveryEngine;
import esiptestbed.mudrod.discoveryengine.OntologyDiscoveryEngine;
import esiptestbed.mudrod.discoveryengine.RecommendEngine;
import esiptestbed.mudrod.discoveryengine.WeblogDiscoveryEngine;
import esiptestbed.mudrod.driver.ESDriver;
import esiptestbed.mudrod.driver.SparkDriver;
import esiptestbed.mudrod.integration.LinkageIntegration;

/**
 * Main entry point for Running the Mudrod system. Invocation of this class is
 * tightly linked to the primary Mudrod configuration which can be located at
 * <a href=
 * "https://github.com/mudrod/mudrod/blob/master/core/src/main/resources/config.xml">config.xml</a>.
 */
public class MudrodEngine {

    private static final Logger LOG = LoggerFactory.getLogger(MudrodEngine.class);
    private Properties props = new Properties();
    private ESDriver es = null;
    private SparkDriver spark = null;
    private static final String LOG_INGEST = "logIngest";
    private static final String FULL_INGEST = "fullIngest";
    private static final String PROCESSING = "processingWithPreResults";
    private static final String SESSION_RECON = "sessionReconstruction";
    private static final String VOCAB_SIM_FROM_LOG = "vocabSimFromLog";
    private static final String ADD_META_ONTO = "addSimFromMetadataAndOnto";
    private static final String LOG_DIR = "logDir";

    /**
     * Public constructor for this class.
     */
    public MudrodEngine() {
        // default constructor
    }

    /**
     * Start the {@link esiptestbed.mudrod.driver.ESDriver}. Should only be called
     * after call to {@link esiptestbed.mudrod.main.MudrodEngine#loadConfig()}
     * 
     * @return fully provisioned {@link esiptestbed.mudrod.driver.ESDriver}
     */
    public ESDriver startESDriver() {
        return new ESDriver(props);
    }

    /**
     * Start the {@link esiptestbed.mudrod.driver.SparkDriver}. Should only be
     * called after call to
     * {@link esiptestbed.mudrod.main.MudrodEngine#loadConfig()}
     * 
     * @return fully provisioned {@link esiptestbed.mudrod.driver.SparkDriver}
     */
    public SparkDriver startSparkDriver() {
        return new SparkDriver();
    }

    /**
     * Retreive the Mudrod configuration as a Properties Map containing K, V of
     * type String.
     * 
     * @return a {@link java.util.Properties} object
     */
    public Properties getConfig() {
        return props;
    }

    /**
     * Retreive the Mudrod {@link esiptestbed.mudrod.driver.ESDriver}
     * 
     * @return the {@link esiptestbed.mudrod.driver.ESDriver} instance.
     */
    public ESDriver getESDriver() {
        return this.es;
    }

    /**
     * Set the Elasticsearch driver for MUDROD
     * 
     * @param es
     *          an ES driver instance
     */
    public void setESDriver(ESDriver es) {
        this.es = es;
    }

    /**
     * Load the configuration provided at <a href=
     * "https://github.com/mudrod/mudrod/blob/master/core/src/main/resources/config.xml">config.xml</a>.
     * 
     * @return a populated {@link java.util.Properties} object.
     */
    public Properties loadConfig() {
        SAXBuilder saxBuilder = new SAXBuilder();
        InputStream configStream = MudrodEngine.class.getClassLoader().getResourceAsStream("config.xml");

        Document document;
        try {
            document = saxBuilder.build(configStream);
            Element rootNode = document.getRootElement();
            List<Element> paraList = rootNode.getChildren("para");

            for (int i = 0; i < paraList.size(); i++) {
                Element paraNode = paraList.get(i);
                props.put(paraNode.getAttributeValue("name"), paraNode.getTextTrim());
            }
        } catch (JDOMException | IOException e) {
            LOG.error("Exception whilst retreiving or processing XML contained within 'config.xml'!", e);
        }
        return getConfig();

    }

    /**
     * Preprocess and process various
     * {@link esiptestbed.mudrod.discoveryengine.DiscoveryEngineAbstract}
     * implementations for weblog, ontology and metadata, linkage discovery and
     * integration.
     */
    public void startFullIngest() {
        DiscoveryEngineAbstract wd = new WeblogDiscoveryEngine(props, es, spark);
        wd.preprocess();
        wd.process();

        DiscoveryEngineAbstract od = new OntologyDiscoveryEngine(props, es, spark);
        od.preprocess();
        od.process();

        DiscoveryEngineAbstract md = new MetadataDiscoveryEngine(props, es, spark);
        md.preprocess();
        md.process();

        LinkageIntegration li = new LinkageIntegration(props, es, spark);
        li.execute();

        DiscoveryEngineAbstract recom = new RecommendEngine(props, es, spark);
        recom.preprocess();
        recom.process();
    }

    /**
     * Begin ingesting logs with the
     * {@link esiptestbed.mudrod.discoveryengine.WeblogDiscoveryEngine}
     */
    public void logIngest() {
        WeblogDiscoveryEngine wd = new WeblogDiscoveryEngine(props, es, spark);
        wd.logIngest();
    }

    /**
     * Reconstructing user sessions based on raw logs.
     */
    public void sessionRestruction() {
        WeblogDiscoveryEngine wd = new WeblogDiscoveryEngine(props, es, spark);
        wd.sessionRestruct();
    }

    /**
     * Calculating vocab similarity based on reconstructed sessions.
     */
    public void vocabSimFromLog() {
        WeblogDiscoveryEngine wd = new WeblogDiscoveryEngine(props, es, spark);
        wd.process();
    }

    /**
     * Adding ontology and metadata results to vocab similarity results from web
     * logs.
     */
    public void addMetaAndOntologySim() {
        DiscoveryEngineAbstract od = new OntologyDiscoveryEngine(props, es, spark);
        od.preprocess();
        od.process();

        DiscoveryEngineAbstract md = new MetadataDiscoveryEngine(props, es, spark);
        md.preprocess();
        md.process();
        LOG.info("*****************Ontology and metadata similarity have "
                + "been added successfully******************");
    }

    /**
     * Only preprocess various
     * {@link esiptestbed.mudrod.discoveryengine.DiscoveryEngineAbstract}
     * implementations for weblog, ontology and metadata, linkage discovery and
     * integration.
     */
    public void startProcessing() {
        DiscoveryEngineAbstract wd = new WeblogDiscoveryEngine(props, es, spark);
        wd.process();

        DiscoveryEngineAbstract od = new OntologyDiscoveryEngine(props, es, spark);
        od.preprocess();
        od.process();

        DiscoveryEngineAbstract md = new MetadataDiscoveryEngine(props, es, spark);
        md.preprocess();
        md.process();

        LinkageIntegration li = new LinkageIntegration(props, es, spark);
        li.execute();

        DiscoveryEngineAbstract recom = new RecommendEngine(props, es, spark);
        recom.process();
    }

    /**
     * Begin ingesting logs with the
     * {@link esiptestbed.mudrod.discoveryengine.WeblogDiscoveryEngine}
     */
    public void startLogIngest() {
        WeblogDiscoveryEngine wd = new WeblogDiscoveryEngine(props, es, spark);
        wd.logIngest();
    }

    /**
     * Close the connection to the {@link esiptestbed.mudrod.driver.ESDriver}
     * instance.
     */
    public void end() {
        if (es != null) {
            es.close();
        }
    }

    /**
     * Main program invocation. Accepts one argument denoting location (on disk)
     * to a log file which is to be ingested. Help will be provided if invoked
     * with incorrect parameters.
     * 
     * @param args
     *          {@link java.lang.String} array contaning correct parameters.
     */
    public static void main(String[] args) {
        // boolean options
        Option helpOpt = new Option("h", "help", false, "show this help message");

        // preprocessing + processing
        Option fullIngestOpt = new Option("f", FULL_INGEST, false, "begin full ingest Mudrod workflow");
        // processing only, assuming that preprocessing results is in logDir
        Option processingOpt = new Option("p", PROCESSING, false, "begin processing with preprocessing results");

        // import raw web log into Elasticsearch
        Option logIngestOpt = new Option("l", LOG_INGEST, false, "begin log ingest without any processing only");
        // preprocessing web log, assuming web log has already been imported
        Option sessionReconOpt = new Option("s", SESSION_RECON, false, "begin session reconstruction");
        // calculate vocab similarity from session reconstrution results
        Option vocabSimFromOpt = new Option("v", VOCAB_SIM_FROM_LOG, false,
                "begin similarity calulation from web log Mudrod workflow");
        // add metadata and ontology preprocessing and processing results into web
        // log vocab similarity
        Option addMetaOntoOpt = new Option("a", ADD_META_ONTO, false, "begin adding metadata and ontology results");

        // argument options
        Option logDirOpt = Option.builder(LOG_DIR).required(true).numberOfArgs(1).hasArg(true)
                .desc("the log directory to be processed by Mudrod").argName(LOG_DIR).build();

        // create the options
        Options options = new Options();
        options.addOption(helpOpt);
        options.addOption(logIngestOpt);
        options.addOption(fullIngestOpt);
        options.addOption(processingOpt);
        options.addOption(sessionReconOpt);
        options.addOption(vocabSimFromOpt);
        options.addOption(addMetaOntoOpt);
        options.addOption(logDirOpt);

        CommandLineParser parser = new DefaultParser();
        try {
            CommandLine line = parser.parse(options, args);
            String processingType = null;

            if (line.hasOption(LOG_INGEST)) {
                processingType = LOG_INGEST;
            } else if (line.hasOption(FULL_INGEST)) {
                processingType = FULL_INGEST;
            } else if (line.hasOption(PROCESSING)) {
                processingType = PROCESSING;
            } else if (line.hasOption(SESSION_RECON)) {
                processingType = SESSION_RECON;
            } else if (line.hasOption(VOCAB_SIM_FROM_LOG)) {
                processingType = VOCAB_SIM_FROM_LOG;
            } else if (line.hasOption(ADD_META_ONTO)) {
                processingType = ADD_META_ONTO;
            }

            String dataDir = line.getOptionValue(LOG_DIR).replace("\\", "/");
            if (!dataDir.endsWith("/")) {
                dataDir += "/";
            }

            MudrodEngine me = new MudrodEngine();
            me.loadConfig();
            me.props.put(LOG_DIR, dataDir);
            me.es = new ESDriver(me.getConfig());
            me.spark = new SparkDriver();
            loadFullConfig(me, dataDir);
            if (processingType != null) {
                switch (processingType) {
                case LOG_INGEST:
                    me.logIngest();
                    break;
                case PROCESSING:
                    me.startProcessing();
                    break;
                case SESSION_RECON:
                    me.sessionRestruction();
                    break;
                case VOCAB_SIM_FROM_LOG:
                    me.vocabSimFromLog();
                    break;
                case ADD_META_ONTO:
                    me.addMetaAndOntologySim();
                    break;
                case FULL_INGEST:
                    me.startFullIngest();
                    break;
                default:
                    break;
                }
            }
            me.end();
        } catch (Exception e) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp(
                    "MudrodEngine: 'logDir' argument is mandatory. " + "User must also provide an ingest method.",
                    options, true);
            LOG.error("Error inputting command line!", e);
            return;
        }
    }

    private static void loadFullConfig(MudrodEngine me, String dataDir) {
        me.props.put("ontologyInputDir", dataDir + "SWEET_ocean/");
        me.props.put("oceanTriples", dataDir + "Ocean_triples.csv");
        me.props.put("userHistoryMatrix", dataDir + "UserHistoryMatrix.csv");
        me.props.put("clickstreamMatrix", dataDir + "ClickstreamMatrix.csv");
        me.props.put("metadataMatrix", dataDir + "MetadataMatrix.csv");
        me.props.put("clickstreamSVDMatrix_tmp", dataDir + "clickstreamSVDMatrix_tmp.csv");
        me.props.put("metadataSVDMatrix_tmp", dataDir + "metadataSVDMatrix_tmp.csv");
        me.props.put("raw_metadataPath", dataDir + "RawMetadata");

        me.props.put("metadataOBCode", dataDir + "MetadataOHCode");
        me.props.put("session_item_Matrix", dataDir + "metadata_session_coocurrence_matrix.csv");
        me.props.put("metadata_topic", dataDir + "metadata_topic");
        me.props.put("metadata_topic_matrix", dataDir + "metadata_topic_matrix.csv");
    }

    /**
     * Obtain the spark implementation.
     * @return the {@link esiptestbed.mudrod.driver.SparkDriver}
     */
    public SparkDriver getSparkDriver() {
        return this.spark;
    }

    /**
     * Set the {@link esiptestbed.mudrod.driver.SparkDriver}
     * @param sparkDriver a configured {@link esiptestbed.mudrod.driver.SparkDriver}
     */
    public void setSparkDriver(SparkDriver sparkDriver) {
        this.spark = sparkDriver;

    }
}