eu.crydee.alignment.aligner.VideoLecturesP.java Source code

Java tutorial

Introduction

Here is the source code for eu.crydee.alignment.aligner.VideoLecturesP.java

Source

/*
 * Copyright 2014 Hugo m09? Mougard.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package eu.crydee.alignment.aligner;

import eu.crydee.alignment.aligner.ae.AlignBestAE;
import eu.crydee.alignment.aligner.ae.AlignThresholdAE;
import eu.crydee.alignment.aligner.ae.CosineSimilarityAE;
import eu.crydee.alignment.aligner.ae.DFXPExtractorAE;
import eu.crydee.alignment.aligner.ae.IsfAE;
import eu.crydee.alignment.aligner.ae.MetricsOneVsOneC;
import eu.crydee.alignment.aligner.ae.TAFC;
import eu.crydee.alignment.aligner.ae.StopWordsAE;
import eu.crydee.alignment.aligner.ae.TEIExtractorAE;
import eu.crydee.alignment.aligner.ae.ViewCopierAE;
import eu.crydee.alignment.aligner.ae.WindowSentenceSplitterAE;
import eu.crydee.alignment.aligner.ae.XmiSerializerC;
import eu.crydee.alignment.aligner.cr.VideoLecturesCR;
import eu.crydee.alignment.aligner.ts.Sentence;
import eu.crydee.alignment.aligner.ts.Token;
import eu.crydee.alignment.metricslab.model.Complete;
import java.io.IOException;
import opennlp.uima.sentdetect.SentenceDetector;
import opennlp.uima.sentdetect.SentenceModelResourceImpl;
import opennlp.uima.tokenize.Tokenizer;
import opennlp.uima.tokenize.TokenizerModelResourceImpl;
import opennlp.uima.util.UimaUtil;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.factory.AggregateBuilder;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.factory.ExternalResourceFactory.createExternalResourceDescription;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.resource.ResourceInitializationException;

/**
 *
 * @author Hugo m09? Mougard
 */
public class VideoLecturesP {

    private static final Logger logger = LogManager.getLogger(VideoLecturesP.class);

    private static class Params {

        final public String TEI_DIRPATH, DFXP_DIRPATH;

        public Params(String teiDirpath, String dfxpDirpath) {
            TEI_DIRPATH = teiDirpath;
            DFXP_DIRPATH = dfxpDirpath;
        }
    }

    public static void main(String[] args)
            throws ResourceInitializationException, UIMAException, IOException, ParseException {
        Params params = parseArguments(args);
        CollectionReader cr = createReader(VideoLecturesCR.class, VideoLecturesCR.PARAM_TEI_DIRPATH,
                params.TEI_DIRPATH, VideoLecturesCR.PARAM_DFXP_DIRPATH, params.DFXP_DIRPATH,
                VideoLecturesCR.PARAM_VIEW_TEI, Config.videoLecturesTei, VideoLecturesCR.PARAM_VIEW_DFXP,
                Config.videoLecturesDfxp);
        AnalysisEngineDescription dfxpExtractor = createEngineDescription(DFXPExtractorAE.class,
                DFXPExtractorAE.PARAM_VIEW_DFXP, Config.videoLecturesDfxp, DFXPExtractorAE.PARAM_VIEW_ELEMENTARY,
                Config.videoLecturesTalk);
        AnalysisEngineDescription teiExtractor = createEngineDescription(TEIExtractorAE.class,
                TEIExtractorAE.PARAM_VIEW_TEI, Config.videoLecturesTei, TEIExtractorAE.PARAM_VIEW_REGULAR,
                Config.videoLecturesArticle);
        AnalysisEngineDescription sentDetector = createEngineDescription(SentenceDetector.class,
                UimaUtil.MODEL_PARAMETER,
                createExternalResourceDescription(SentenceModelResourceImpl.class,
                        "file:org/apache/opennlp/en-sent.bin"),
                UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName());
        AnalysisEngineDescription windowSentDetector = createEngineDescription(WindowSentenceSplitterAE.class,
                WindowSentenceSplitterAE.PARAM_WINDOW_SIZE, 15);
        AnalysisEngineDescription tokenizer = createEngineDescription(Tokenizer.class, UimaUtil.MODEL_PARAMETER,
                createExternalResourceDescription(TokenizerModelResourceImpl.class,
                        "file:org/apache/opennlp/en-token.bin"),
                UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName(), UimaUtil.TOKEN_TYPE_PARAMETER,
                Token.class.getName());
        AnalysisEngineDescription stopWords = createEngineDescription(StopWordsAE.class);
        AnalysisEngineDescription isf = createEngineDescription(IsfAE.class, IsfAE.PARAM_VIEW_ELEMENTARY,
                Config.videoLecturesTalk, IsfAE.PARAM_VIEW_REGULAR, Config.videoLecturesArticle);
        AnalysisEngineDescription cosineSimilarity = createEngineDescription(CosineSimilarityAE.class,
                CosineSimilarityAE.PARAM_VIEW_LEFT, Config.videoLecturesTalk, CosineSimilarityAE.PARAM_VIEW_RIGHT,
                Config.videoLecturesArticle);
        AnalysisEngineDescription copierTalkBest = createEngineDescription(ViewCopierAE.class,
                ViewCopierAE.PARAM_VIEW_SOURCE, Config.videoLecturesTalk, ViewCopierAE.PARAM_VIEW_DESTINATION,
                Config.videoLecturesTalkBest);
        AnalysisEngineDescription copierArticleBest = createEngineDescription(ViewCopierAE.class,
                ViewCopierAE.PARAM_VIEW_SOURCE, Config.videoLecturesArticle, ViewCopierAE.PARAM_VIEW_DESTINATION,
                Config.videoLecturesArticleBest);
        AnalysisEngineDescription copierTalkThreshold = createEngineDescription(ViewCopierAE.class,
                ViewCopierAE.PARAM_VIEW_SOURCE, Config.videoLecturesTalk, ViewCopierAE.PARAM_VIEW_DESTINATION,
                Config.videoLecturesTalkThreshold);
        AnalysisEngineDescription copierArticleThreshold = createEngineDescription(ViewCopierAE.class,
                ViewCopierAE.PARAM_VIEW_SOURCE, Config.videoLecturesArticle, ViewCopierAE.PARAM_VIEW_DESTINATION,
                Config.videoLecturesArticleThreshold);
        AnalysisEngineDescription alignerBest = createEngineDescription(AlignBestAE.class,
                AlignBestAE.PARAM_VIEW_LEFT, Config.videoLecturesTalkBest, AlignBestAE.PARAM_VIEW_RIGHT,
                Config.videoLecturesArticleBest);
        AnalysisEngineDescription alignerThreshold = createEngineDescription(AlignThresholdAE.class,
                AlignThresholdAE.PARAM_VIEW_LEFT, Config.videoLecturesTalkThreshold,
                AlignThresholdAE.PARAM_VIEW_RIGHT, Config.videoLecturesArticleThreshold);
        AnalysisEngineDescription casWriter = createEngineDescription(XmiSerializerC.class,
                XmiSerializerC.PARAM_OUT_FOLDER, Config.videoLecturesCas);
        AnalysisEngineDescription tafWriterBest = createEngineDescription(TAFC.class, TAFC.PARAM_OUTPUT_FOLDER,
                Config.videoLecturesTafBest, TAFC.PARAM_VIEW_LEFT, Config.videoLecturesTalkBest,
                TAFC.PARAM_VIEW_RIGHT, Config.videoLecturesArticleBest);
        AnalysisEngineDescription tafWriterThreshold = createEngineDescription(TAFC.class, TAFC.PARAM_OUTPUT_FOLDER,
                Config.videoLecturesTafThreshold, TAFC.PARAM_VIEW_LEFT, Config.videoLecturesTalkThreshold,
                TAFC.PARAM_VIEW_RIGHT, Config.videoLecturesArticleThreshold);
        AnalysisEngineDescription oneVOneMetrics = createEngineDescription(MetricsOneVsOneC.class,
                MetricsOneVsOneC.PARAM_TAF_FOLDER_1, Config.videoLecturesTafBest,
                MetricsOneVsOneC.PARAM_TAF_FOLDER_2, Config.videoLecturesTafThreshold,
                MetricsOneVsOneC.PARAM_LEFT_ALGO_NAME, "Best candidate stupid alignment",
                MetricsOneVsOneC.PARAM_RIGHT_ALGO_NAME, "Thresholded stupid alignment",
                MetricsOneVsOneC.PARAM_HTML_OUTPUT_FILEPATH, Config.videoLecturesHtml,
                MetricsOneVsOneC.PARAM_METRICS_TO_SUMMARIZE, new String[] { Complete.LEFT_ORDER_CONSERVATION_KEY,
                        Complete.SYMMETRY, Complete.LEFT_COOCCURRENCE });
        AggregateBuilder b = new AggregateBuilder();
        b.add(dfxpExtractor);
        b.add(teiExtractor);
        b.add(windowSentDetector, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesTalk);
        b.add(sentDetector, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesArticle);
        b.add(tokenizer, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesArticle);
        b.add(stopWords, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesTalk);
        b.add(stopWords, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesArticle);
        b.add(isf);
        b.add(cosineSimilarity);
        b.add(copierTalkBest);
        b.add(copierArticleBest);
        b.add(alignerBest);
        b.add(copierTalkThreshold);
        b.add(copierArticleThreshold);
        b.add(alignerThreshold);
        b.add(casWriter);
        b.add(tafWriterBest);
        b.add(tafWriterThreshold);
        b.add(oneVOneMetrics);
        SimplePipeline.runPipeline(cr, b.createAggregate());
    }

    static private Params parseArguments(String[] args) throws ParseException {
        Options shortCircuitOptions = new Options();
        shortCircuitOptions
                .addOption(OptionBuilder.withLongOpt("help").withDescription("Print this message.").create('h'));
        shortCircuitOptions
                .addOption(OptionBuilder.withLongOpt("version").withDescription("Print the version.").create('v'));
        Options options = new Options();
        options.addOption(OptionBuilder.isRequired().withLongOpt("tei").hasArg().withArgName("folder-path")
                .withDescription("Path to the folder of the TEI body files.").create('t'));
        options.addOption(OptionBuilder.isRequired().withLongOpt("dfxp").hasArg().withArgName("folder-path")
                .withDescription("Path to the folder of the DFXP files.").create('d'));
        CommandLineParser parser = new PosixParser();
        CommandLine cmd = parser.parse(shortCircuitOptions, args, true);
        if (cmd.hasOption('h')) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("aligner", options, true);
            System.exit(0);
        }
        if (cmd.hasOption('v')) {
            System.out.println("aligner v1.0.0-SNAPSHOT");
            System.exit(0);
        }
        cmd = parser.parse(options, args);
        return new Params(cmd.getOptionValue('t'), cmd.getOptionValue('d'));
    }
}