Java tutorial
/* * Copyright 2014 Hugo m09? Mougard. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package eu.crydee.alignment.aligner.cr; import com.google.common.collect.Sets; import eu.crydee.alignment.aligner.ts.Document; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.collection.CollectionException; import org.apache.uima.fit.component.JCasCollectionReader_ImplBase; import org.apache.uima.fit.component.ViewCreatorAnnotator; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.util.Progress; import org.apache.uima.util.ProgressImpl; /** * * @author Hugo m09? Mougard */ public class VideoLecturesCR extends JCasCollectionReader_ImplBase { private static final Logger logger = LogManager.getLogger(VideoLecturesCR.class); public static final String PARAM_TEI_DIRPATH = "P1"; @ConfigurationParameter(name = PARAM_TEI_DIRPATH, mandatory = true) private String teiDirpath; public static final String PARAM_DFXP_DIRPATH = "P2"; @ConfigurationParameter(name = PARAM_DFXP_DIRPATH, mandatory = true) private String dfxpDirpath; public static final String PARAM_VIEW_TEI = "P3"; @ConfigurationParameter(name = PARAM_VIEW_TEI, mandatory = true) private String teiName; public static final String PARAM_VIEW_DFXP = "P4"; @ConfigurationParameter(name = PARAM_VIEW_DFXP, mandatory = true) private String dfxpName; private int currentIndex; private Iterator<String> ids; @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); String[] dfxps = new File(dfxpDirpath).list(), teis = new File(teiDirpath).list(); List<String> errs = new ArrayList<>(); if (teis == null) { errs.add("The TEI directory path doesn't resolve to a directory."); } else if (dfxps == null) { errs.add("The DFXP directory path doesn't resolve to a directory."); } if (!errs.isEmpty()) { logger.error(errs.stream().collect(Collectors.joining("\n"))); throw new ResourceInitializationException(); } Set<String> dfxpsSet = Sets.newHashSet(dfxps); ids = Arrays.stream(teis).map(s -> s.replace(".tei.xml", "")).filter(s -> dfxpsSet.contains(s + ".dfxp")) .iterator(); currentIndex = 0; } @Override public void getNext(JCas jcas) throws IOException, CollectionException { JCas teiV, dfxpV; try { teiV = ViewCreatorAnnotator.createViewSafely(jcas, teiName); dfxpV = ViewCreatorAnnotator.createViewSafely(jcas, dfxpName); } catch (AnalysisEngineProcessException ex) { throw new CollectionException(ex); } jcas.setDocumentLanguage("en"); teiV.setDocumentLanguage("en"); dfxpV.setDocumentLanguage("en"); String id = ids.next(); teiV.setDocumentText( FileUtils.readFileToString(new File(teiDirpath, id + ".tei.xml"), StandardCharsets.UTF_8)); dfxpV.setDocumentText( FileUtils.readFileToString(new File(dfxpDirpath, id + ".dfxp"), StandardCharsets.UTF_8)); jcas.setDocumentText("The default CAS stays empty in this pipeline."); for (JCas j : new JCas[] { teiV, dfxpV, jcas }) { Document document = new Document(j, 0, j.getDocumentText().length() - 1); document.setName(id); document.addToIndexes(); } ++currentIndex; } @Override public boolean hasNext() throws IOException, CollectionException { return ids.hasNext(); } @Override public Progress[] getProgress() { return new Progress[] { new ProgressImpl(currentIndex, -1, Progress.ENTITIES) }; } }