Java tutorial
/* * Copyright 2014 Hugo m09? Mougard. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package eu.crydee.alignment.aligner; import eu.crydee.alignment.aligner.ae.AlignBestAE; import eu.crydee.alignment.aligner.ae.AlignThresholdAE; import eu.crydee.alignment.aligner.ae.CosineSimilarityAE; import eu.crydee.alignment.aligner.ae.DFXPExtractorAE; import eu.crydee.alignment.aligner.ae.IsfAE; import eu.crydee.alignment.aligner.ae.MetricsOneVsOneC; import eu.crydee.alignment.aligner.ae.TAFC; import eu.crydee.alignment.aligner.ae.StopWordsAE; import eu.crydee.alignment.aligner.ae.TEIExtractorAE; import eu.crydee.alignment.aligner.ae.ViewCopierAE; import eu.crydee.alignment.aligner.ae.WindowSentenceSplitterAE; import eu.crydee.alignment.aligner.ae.XmiSerializerC; import eu.crydee.alignment.aligner.cr.VideoLecturesCR; import eu.crydee.alignment.aligner.ts.Sentence; import eu.crydee.alignment.aligner.ts.Token; import eu.crydee.alignment.metricslab.model.Complete; import java.io.IOException; import opennlp.uima.sentdetect.SentenceDetector; import opennlp.uima.sentdetect.SentenceModelResourceImpl; import opennlp.uima.tokenize.Tokenizer; import opennlp.uima.tokenize.TokenizerModelResourceImpl; import opennlp.uima.util.UimaUtil; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.uima.UIMAException; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.cas.CAS; import org.apache.uima.collection.CollectionReader; import org.apache.uima.fit.factory.AggregateBuilder; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.ExternalResourceFactory.createExternalResourceDescription; import org.apache.uima.fit.pipeline.SimplePipeline; import org.apache.uima.resource.ResourceInitializationException; /** * * @author Hugo m09? Mougard */ public class VideoLecturesP { private static final Logger logger = LogManager.getLogger(VideoLecturesP.class); private static class Params { final public String TEI_DIRPATH, DFXP_DIRPATH; public Params(String teiDirpath, String dfxpDirpath) { TEI_DIRPATH = teiDirpath; DFXP_DIRPATH = dfxpDirpath; } } public static void main(String[] args) throws ResourceInitializationException, UIMAException, IOException, ParseException { Params params = parseArguments(args); CollectionReader cr = createReader(VideoLecturesCR.class, VideoLecturesCR.PARAM_TEI_DIRPATH, params.TEI_DIRPATH, VideoLecturesCR.PARAM_DFXP_DIRPATH, params.DFXP_DIRPATH, VideoLecturesCR.PARAM_VIEW_TEI, Config.videoLecturesTei, VideoLecturesCR.PARAM_VIEW_DFXP, Config.videoLecturesDfxp); AnalysisEngineDescription dfxpExtractor = createEngineDescription(DFXPExtractorAE.class, DFXPExtractorAE.PARAM_VIEW_DFXP, Config.videoLecturesDfxp, DFXPExtractorAE.PARAM_VIEW_ELEMENTARY, Config.videoLecturesTalk); AnalysisEngineDescription teiExtractor = createEngineDescription(TEIExtractorAE.class, TEIExtractorAE.PARAM_VIEW_TEI, Config.videoLecturesTei, TEIExtractorAE.PARAM_VIEW_REGULAR, Config.videoLecturesArticle); AnalysisEngineDescription sentDetector = createEngineDescription(SentenceDetector.class, UimaUtil.MODEL_PARAMETER, createExternalResourceDescription(SentenceModelResourceImpl.class, "file:org/apache/opennlp/en-sent.bin"), UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName()); AnalysisEngineDescription windowSentDetector = createEngineDescription(WindowSentenceSplitterAE.class, WindowSentenceSplitterAE.PARAM_WINDOW_SIZE, 15); AnalysisEngineDescription tokenizer = createEngineDescription(Tokenizer.class, UimaUtil.MODEL_PARAMETER, createExternalResourceDescription(TokenizerModelResourceImpl.class, "file:org/apache/opennlp/en-token.bin"), UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName(), UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName()); AnalysisEngineDescription stopWords = createEngineDescription(StopWordsAE.class); AnalysisEngineDescription isf = createEngineDescription(IsfAE.class, IsfAE.PARAM_VIEW_ELEMENTARY, Config.videoLecturesTalk, IsfAE.PARAM_VIEW_REGULAR, Config.videoLecturesArticle); AnalysisEngineDescription cosineSimilarity = createEngineDescription(CosineSimilarityAE.class, CosineSimilarityAE.PARAM_VIEW_LEFT, Config.videoLecturesTalk, CosineSimilarityAE.PARAM_VIEW_RIGHT, Config.videoLecturesArticle); AnalysisEngineDescription copierTalkBest = createEngineDescription(ViewCopierAE.class, ViewCopierAE.PARAM_VIEW_SOURCE, Config.videoLecturesTalk, ViewCopierAE.PARAM_VIEW_DESTINATION, Config.videoLecturesTalkBest); AnalysisEngineDescription copierArticleBest = createEngineDescription(ViewCopierAE.class, ViewCopierAE.PARAM_VIEW_SOURCE, Config.videoLecturesArticle, ViewCopierAE.PARAM_VIEW_DESTINATION, Config.videoLecturesArticleBest); AnalysisEngineDescription copierTalkThreshold = createEngineDescription(ViewCopierAE.class, ViewCopierAE.PARAM_VIEW_SOURCE, Config.videoLecturesTalk, ViewCopierAE.PARAM_VIEW_DESTINATION, Config.videoLecturesTalkThreshold); AnalysisEngineDescription copierArticleThreshold = createEngineDescription(ViewCopierAE.class, ViewCopierAE.PARAM_VIEW_SOURCE, Config.videoLecturesArticle, ViewCopierAE.PARAM_VIEW_DESTINATION, Config.videoLecturesArticleThreshold); AnalysisEngineDescription alignerBest = createEngineDescription(AlignBestAE.class, AlignBestAE.PARAM_VIEW_LEFT, Config.videoLecturesTalkBest, AlignBestAE.PARAM_VIEW_RIGHT, Config.videoLecturesArticleBest); AnalysisEngineDescription alignerThreshold = createEngineDescription(AlignThresholdAE.class, AlignThresholdAE.PARAM_VIEW_LEFT, Config.videoLecturesTalkThreshold, AlignThresholdAE.PARAM_VIEW_RIGHT, Config.videoLecturesArticleThreshold); AnalysisEngineDescription casWriter = createEngineDescription(XmiSerializerC.class, XmiSerializerC.PARAM_OUT_FOLDER, Config.videoLecturesCas); AnalysisEngineDescription tafWriterBest = createEngineDescription(TAFC.class, TAFC.PARAM_OUTPUT_FOLDER, Config.videoLecturesTafBest, TAFC.PARAM_VIEW_LEFT, Config.videoLecturesTalkBest, TAFC.PARAM_VIEW_RIGHT, Config.videoLecturesArticleBest); AnalysisEngineDescription tafWriterThreshold = createEngineDescription(TAFC.class, TAFC.PARAM_OUTPUT_FOLDER, Config.videoLecturesTafThreshold, TAFC.PARAM_VIEW_LEFT, Config.videoLecturesTalkThreshold, TAFC.PARAM_VIEW_RIGHT, Config.videoLecturesArticleThreshold); AnalysisEngineDescription oneVOneMetrics = createEngineDescription(MetricsOneVsOneC.class, MetricsOneVsOneC.PARAM_TAF_FOLDER_1, Config.videoLecturesTafBest, MetricsOneVsOneC.PARAM_TAF_FOLDER_2, Config.videoLecturesTafThreshold, MetricsOneVsOneC.PARAM_LEFT_ALGO_NAME, "Best candidate stupid alignment", MetricsOneVsOneC.PARAM_RIGHT_ALGO_NAME, "Thresholded stupid alignment", MetricsOneVsOneC.PARAM_HTML_OUTPUT_FILEPATH, Config.videoLecturesHtml, MetricsOneVsOneC.PARAM_METRICS_TO_SUMMARIZE, new String[] { Complete.LEFT_ORDER_CONSERVATION_KEY, Complete.SYMMETRY, Complete.LEFT_COOCCURRENCE }); AggregateBuilder b = new AggregateBuilder(); b.add(dfxpExtractor); b.add(teiExtractor); b.add(windowSentDetector, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesTalk); b.add(sentDetector, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesArticle); b.add(tokenizer, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesArticle); b.add(stopWords, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesTalk); b.add(stopWords, CAS.NAME_DEFAULT_SOFA, Config.videoLecturesArticle); b.add(isf); b.add(cosineSimilarity); b.add(copierTalkBest); b.add(copierArticleBest); b.add(alignerBest); b.add(copierTalkThreshold); b.add(copierArticleThreshold); b.add(alignerThreshold); b.add(casWriter); b.add(tafWriterBest); b.add(tafWriterThreshold); b.add(oneVOneMetrics); SimplePipeline.runPipeline(cr, b.createAggregate()); } static private Params parseArguments(String[] args) throws ParseException { Options shortCircuitOptions = new Options(); shortCircuitOptions .addOption(OptionBuilder.withLongOpt("help").withDescription("Print this message.").create('h')); shortCircuitOptions .addOption(OptionBuilder.withLongOpt("version").withDescription("Print the version.").create('v')); Options options = new Options(); options.addOption(OptionBuilder.isRequired().withLongOpt("tei").hasArg().withArgName("folder-path") .withDescription("Path to the folder of the TEI body files.").create('t')); options.addOption(OptionBuilder.isRequired().withLongOpt("dfxp").hasArg().withArgName("folder-path") .withDescription("Path to the folder of the DFXP files.").create('d')); CommandLineParser parser = new PosixParser(); CommandLine cmd = parser.parse(shortCircuitOptions, args, true); if (cmd.hasOption('h')) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("aligner", options, true); System.exit(0); } if (cmd.hasOption('v')) { System.out.println("aligner v1.0.0-SNAPSHOT"); System.exit(0); } cmd = parser.parse(options, args); return new Params(cmd.getOptionValue('t'), cmd.getOptionValue('d')); } }