edu.eurac.commul.pepperModules.mmax2.SaltExtendedFileGenerator.java Source code

Java tutorial

Introduction

Here is the source code for edu.eurac.commul.pepperModules.mmax2.SaltExtendedFileGenerator.java

Source

/**
 * Copyright 2009 Humboldt University of Berlin, INRIA.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *
 */
package edu.eurac.commul.pepperModules.mmax2;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.corpus_tools.pepper.modules.exceptions.PepperModuleException;

import edu.eurac.commul.annotations.mmax2.mmax2wrapper.FileGenerator;
import edu.eurac.commul.annotations.mmax2.mmax2wrapper.MMAX2WrapperException;
import edu.eurac.commul.pepperModules.mmax2.SaltExtendedCorpusFactory.SaltExtendedCorpus;
import edu.eurac.commul.pepperModules.mmax2.SaltExtendedDocumentFactory.SaltExtendedDocument;
import edu.eurac.commul.pepperModules.mmax2.SaltExtendedMarkableFactory.SaltExtendedMarkable;
import edu.eurac.commul.pepperModules.mmax2.SaltExtendedMarkableFactory.SaltExtendedMarkableContainer;

/**
 * This class aims at dealing with the outputting to files of a Mmax Corpus enhanced with Salt information
 * @author Lionel Nicolas
 */
public class SaltExtendedFileGenerator extends FileGenerator {

    /** 
     * Outputs the SaltExtended corpus to the folders contained in its internal variables
     * @param corpus The corpus to output
     * @param ressourcePath The path where to find files required by mmax2 such as the markables.dtd etc. 
     * @throws MMAX2WrapperException
     * @throws IOException
     * @throws ParserConfigurationException
     */
    public static void createCorpus(SaltExtendedCorpus corpus)
            throws IOException, ParserConfigurationException, MMAX2WrapperException {
        FileGenerator.outputCorpus(corpus);

        File saltInfosDirectory = new File(corpus.getCorpusPath() + File.separator + corpus.getSaltInfoPath());
        if (!saltInfosDirectory.mkdirs()) {
            throw new PepperModuleException(
                    "create folder for SaltInfo '" + saltInfosDirectory.getAbsolutePath() + "'");
        }

        for (SaltExtendedDocument document : corpus.getSaltExtendedDocuments()) {
            createSaltInfoFile(document);
        }
    }

    public static void initializeCorpus(SaltExtendedCorpus corpus) throws IOException, MMAX2WrapperException {
        FileGenerator.initializeCorpus(corpus);

        File saltInfosDirectory = new File(corpus.getCorpusPath() + File.separator + corpus.getSaltInfoPath());
        if (!saltInfosDirectory.mkdirs()) {
            throw new PepperModuleException(
                    "create folder for SaltInfo '" + saltInfosDirectory.getAbsolutePath() + "'");
        }
    }

    public static void outputDocument(SaltExtendedCorpus corpus, SaltExtendedDocument document)
            throws MMAX2WrapperException, IOException {
        FileGenerator.outputDocument(corpus, document);
        createSaltInfoFile(document);
    }

    public static void finalizeCorpus(SaltExtendedCorpus corpus) throws MMAX2WrapperException, IOException {
        FileGenerator.finalizeCorpus(corpus);
    }

    private static void createSaltInfoFile(SaltExtendedDocument document)
            throws IOException, MMAX2WrapperException {
        OutputXmlFile(
                document.getFactory().getCorpus().getCorpusPath() + File.separator
                        + document.getFactory().getCorpus().getSaltInfoPath() + File.separator
                        + document.getDocumentId() + SaltExtendedMmax2Infos.SALT_INFO_FILE_ENDING,
                createSaltInfoFileString(document));
    }

    private static final String MMAX2_SALT_INFO_GENERIC = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "\n" + "<"
            + SaltExtendedMmax2Infos.SALT_INFOS_NODE_NAME + ">" + "\n" + "   @salt_infos@" + "\n" + "</"
            + SaltExtendedMmax2Infos.SALT_INFOS_NODE_NAME + ">";

    //Generates the entire content of the SAltInfo File corresponding to the SaltExtendedDocument document
    private static String createSaltInfoFileString(SaltExtendedDocument document) throws MMAX2WrapperException {
        String cpy = MMAX2_SALT_INFO_GENERIC + "";

        ArrayList<SaltExtendedMarkable> markables = document.getAllSaltExtendedMarkables();
        ArrayList<String> markablesStr = new ArrayList<String>();
        for (SaltExtendedMarkable markable : markables) {
            markablesStr.add(createSaltInfoEntry(markable));
        }
        cpy = cpy.replaceAll("@salt_infos@", EscapeStringSimple(StringUtils.join(markablesStr, LINE_SEPARATOR)));

        return cpy;
    }

    private static final String MMAX2_SALT_INFO_ENTRY_GENERIC = "   <" + SaltExtendedMmax2Infos.SALT_INFO_NODE_NAME
            + " " + SaltExtendedMmax2Infos.SALT_INFO_ID_ATTR_NAME + "=\"@markable_id@\" "
            + SaltExtendedMmax2Infos.SALT_INFO_SID_ATTR_NAME + "=\"@salt_id@\" "
            + SaltExtendedMmax2Infos.SALT_INFO_STYPE_ATTR_NAME + "=\"@salt_type@\" "
            + SaltExtendedMmax2Infos.SALT_INFO_SNAME_ATTR_NAME + "=\"@salt_name@\" @additional@/>";

    //Generates an entry of the SAltInfo File for the SaltExtendedMarkable markable
    private static String createSaltInfoEntry(SaltExtendedMarkable markable) throws MMAX2WrapperException {
        String cpy = MMAX2_SALT_INFO_ENTRY_GENERIC + "";
        cpy = cpy.replaceAll("@markable_id@", EscapeString(markable.getId()));
        cpy = cpy.replaceAll("@salt_id@", EscapeString(markable.getSId()));
        cpy = cpy.replaceAll("@salt_type@", EscapeString(markable.getSType()));
        cpy = cpy.replaceAll("@salt_name@", EscapeString(markable.getSName()));
        if (markable.getSType().equals(SaltExtendedMmax2Infos.SALT_INFO_TYPE_SCONTAINER)) {
            SaltExtendedMarkableContainer containerMarkable = (SaltExtendedMarkableContainer) markable;
            cpy = cpy.replaceAll("@additional@",
                    SaltExtendedMmax2Infos.SALT_INFO_CONTAINED_ID_ATTR_NAME + "=\""
                            + EscapeString(containerMarkable.getContainedId()) + "\" "
                            + SaltExtendedMmax2Infos.SALT_INFO_CONTAINED_SCHEME_ATTR_NAME + "=\""
                            + EscapeString(containerMarkable.getContainedSchemeName()));
        } else {
            cpy = cpy.replaceAll("@additional@", "");
        }

        return cpy;
    }

    // some useful methods to avoid outputting non-proper Xml files
    protected static String EscapeString(String original) throws MMAX2WrapperException {
        String copy = original + "";
        return EscapeStringSimple(StringEscapeUtils.escapeXml(copy));
    }

    private final static String[] prohibed_arguments = { "markable_id", "salt_layer", "salt_type", "salt_name",
            "salt_infos" };

    private final static String prohibed_regexp = java.util.regex.Pattern.quote("@") + "("
            + StringUtils.join(prohibed_arguments, "|") + ")" + java.util.regex.Pattern.quote("@");

    protected static String EscapeStringSimple(String original) throws MMAX2WrapperException {
        String copy = original + "";
        if (copy.matches(prohibed_regexp)) {
            throw new PepperModuleException("'" + copy + "' contains one of the following reserved @argument@ ("
                    + prohibed_arguments + ")");
        }
        return FileGenerator.EscapeStringSimple(copy);// if good at this level then check upstairs
    }

}