eu.crydee.alignment.aligner.cr.VideoLecturesCR.java Source code

Java tutorial

Introduction

Here is the source code for eu.crydee.alignment.aligner.cr.VideoLecturesCR.java

Source

/*
 * Copyright 2014 Hugo m09? Mougard.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package eu.crydee.alignment.aligner.cr;

import com.google.common.collect.Sets;
import eu.crydee.alignment.aligner.ts.Document;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.component.ViewCreatorAnnotator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

/**
 *
 * @author Hugo m09? Mougard
 */
public class VideoLecturesCR extends JCasCollectionReader_ImplBase {

    private static final Logger logger = LogManager.getLogger(VideoLecturesCR.class);

    public static final String PARAM_TEI_DIRPATH = "P1";
    @ConfigurationParameter(name = PARAM_TEI_DIRPATH, mandatory = true)
    private String teiDirpath;

    public static final String PARAM_DFXP_DIRPATH = "P2";
    @ConfigurationParameter(name = PARAM_DFXP_DIRPATH, mandatory = true)
    private String dfxpDirpath;

    public static final String PARAM_VIEW_TEI = "P3";
    @ConfigurationParameter(name = PARAM_VIEW_TEI, mandatory = true)
    private String teiName;

    public static final String PARAM_VIEW_DFXP = "P4";
    @ConfigurationParameter(name = PARAM_VIEW_DFXP, mandatory = true)
    private String dfxpName;

    private int currentIndex;

    private Iterator<String> ids;

    @Override
    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        String[] dfxps = new File(dfxpDirpath).list(), teis = new File(teiDirpath).list();
        List<String> errs = new ArrayList<>();
        if (teis == null) {
            errs.add("The TEI directory path doesn't resolve to a directory.");
        } else if (dfxps == null) {
            errs.add("The DFXP directory path doesn't resolve to a directory.");
        }
        if (!errs.isEmpty()) {
            logger.error(errs.stream().collect(Collectors.joining("\n")));
            throw new ResourceInitializationException();
        }
        Set<String> dfxpsSet = Sets.newHashSet(dfxps);
        ids = Arrays.stream(teis).map(s -> s.replace(".tei.xml", "")).filter(s -> dfxpsSet.contains(s + ".dfxp"))
                .iterator();
        currentIndex = 0;
    }

    @Override
    public void getNext(JCas jcas) throws IOException, CollectionException {
        JCas teiV, dfxpV;
        try {
            teiV = ViewCreatorAnnotator.createViewSafely(jcas, teiName);
            dfxpV = ViewCreatorAnnotator.createViewSafely(jcas, dfxpName);
        } catch (AnalysisEngineProcessException ex) {
            throw new CollectionException(ex);
        }
        jcas.setDocumentLanguage("en");
        teiV.setDocumentLanguage("en");
        dfxpV.setDocumentLanguage("en");
        String id = ids.next();
        teiV.setDocumentText(
                FileUtils.readFileToString(new File(teiDirpath, id + ".tei.xml"), StandardCharsets.UTF_8));
        dfxpV.setDocumentText(
                FileUtils.readFileToString(new File(dfxpDirpath, id + ".dfxp"), StandardCharsets.UTF_8));
        jcas.setDocumentText("The default CAS stays empty in this pipeline.");
        for (JCas j : new JCas[] { teiV, dfxpV, jcas }) {
            Document document = new Document(j, 0, j.getDocumentText().length() - 1);
            document.setName(id);
            document.addToIndexes();
        }
        ++currentIndex;
    }

    @Override
    public boolean hasNext() throws IOException, CollectionException {
        return ids.hasNext();
    }

    @Override
    public Progress[] getProgress() {
        return new Progress[] { new ProgressImpl(currentIndex, -1, Progress.ENTITIES) };
    }
}