Java tutorial
/** * Copyright 2009 Humboldt-Universitt zu Berlin, INRIA. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * */ package org.corpus_tools.pepper.impl; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; import org.apache.commons.io.FileUtils; import org.corpus_tools.pepper.core.ModuleControllerImpl; import org.corpus_tools.pepper.modules.PepperImporter; import org.corpus_tools.pepper.modules.PepperMapper; import org.corpus_tools.pepper.modules.PepperModule; import org.corpus_tools.pepper.modules.PepperModuleProperties; import org.corpus_tools.pepper.modules.exceptions.PepperModuleException; import org.corpus_tools.salt.SaltFactory; import org.corpus_tools.salt.common.SCorpus; import org.corpus_tools.salt.common.SCorpusGraph; import org.corpus_tools.salt.common.SDocument; import org.corpus_tools.salt.core.SAnnotation; import org.corpus_tools.salt.core.SAnnotationContainer; import org.corpus_tools.salt.core.SLayer; import org.corpus_tools.salt.core.SMetaAnnotation; import org.corpus_tools.salt.core.SNode; import org.corpus_tools.salt.core.SRelation; import org.corpus_tools.salt.graph.IdentifiableElement; import org.corpus_tools.salt.graph.Identifier; import org.corpus_tools.salt.graph.Label; import org.corpus_tools.salt.graph.Relation; import org.corpus_tools.salt.util.SaltUtil; import org.eclipse.emf.common.util.URI; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * <p> * This class is a helper class to perform actions before or after a mapping was * done. A mapping could be the mapping of a single corpus or document or even * the mapping of an entire corpus structure. * </p> * <p> * This could be helpful, for instance to make some processing after the mapping * e.g. adding all created nodes and relations to a layer. To trigger an action * for a specific Pepper module a set of customization properties is available. * Customization properties triggering a post processing starts with * {@value PepperModuleProperties#PREFIX_PEPPER_AFTER}. Such an action could be * the enhancement of a corpus with metadata (following the property file * syntax) to enhance corpora in formats, which do not support metadata, see * {@link #readMeta(Identifier)}. Or the printing of a corpus structure, see * {@link #reportCorpusStructure(SNode, String, boolean)}. * </p> * <p> * Objects of this class are used by * <ol> * <li>{@link PepperModuleImpl} to perform actions before or after a single * document or corpus was processed and</li> * <li>used by {@link ModuleControllerImpl} to perform actions before or after * the entire corpus graph was processed.</li> * </ol> * </p> * <p> * This is an overview on the possible actions: * <ul> * <li>single corpus or document * <ul> * <li>before * <ul> * <li>{@link #addSLayers(SDocument, String)}</li> * <li>{@link #readMeta(Identifier)}</li> * </ul> * </li> * <li>after * <ul> * <li>{@link #addSLayers(SDocument, String)}</li> * </ul> * </li> * </ul> * </li> * <li>entire document structure * <ul> * <li>before * <ul> * <li>{@link #reportCorpusStructure(SNode, String, boolean)}</li> * </ul> * </li> * <li>after * <ul> * <li>{@link #copyResources(String)}</li> * </ul> * </li> * </ul> * </li> * </ul> * </p> * * @author florian * @since 3.0.0 * */ public class BeforeAfterAction { /** * Initializes this object and in case an action should be performed, it * sets the internal {@link PepperModule} which does the action, and the * internal {@link SCorpusGraph} on which the action should be performed. **/ public BeforeAfterAction(PepperModule pepperModule) { this.pepperModule = pepperModule; logger = LoggerFactory.getLogger(getPepperModule().getName()); } private PepperModule pepperModule; public PepperModule getPepperModule() { return pepperModule; } private Logger logger = LoggerFactory.getLogger("Pepper"); /** * Invokes an actions, after the mapping of an entire corpus structure was * done. Customization properties triggering a pre processing starts with * {@value PepperModuleProperties#PREFIX_PEPPER_AFTER}. This method is * called before invocation of {@link PepperModule#start()}. * * @throws PepperModuleException */ public void before(SCorpusGraph corpusGraph) throws PepperModuleException { if (getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_REPORT_CORPUSGRAPH) != null) { boolean isReport = Boolean.parseBoolean(getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_REPORT_CORPUSGRAPH).getValue().toString()); if (isReport && corpusGraph != null) { List<SNode> roots = corpusGraph.getRoots(); if (roots != null) { StringBuilder str = new StringBuilder(); str.append("corpus structure imported by "); str.append(getPepperModule().getName()); for (SNode root : roots) { str.append(":\n"); str.append(getPepperModule().getSaltProject().getCorpusGraphs() .indexOf(((SCorpus) root).getGraph())); str.append("\n"); str.append(reportCorpusStructure(corpusGraph, root, "", true)); } logger.info(str.toString()); } } } } /** * Invokes an actions, after the mapping of an entire corpus structure was * done. Customization properties triggering a post processing starts with * {@value PepperModuleProperties#PREFIX_PEPPER_AFTER}. This method is * called after invocation of {@link {@link PepperModule#start()} . * * @throws PepperModuleException */ public void after(SCorpusGraph corpusGraph) throws PepperModuleException { if (getPepperModule().getProperties().getProperty(PepperModuleProperties.PROP_AFTER_COPY_RES) != null) { // copies resources as files from source to target String resString = (String) getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_COPY_RES).getValue(); copyResources(resString); } } /** * Returns the corpus structure as an ascii tree. * * @param corpusGraph * the corpus structure to be printed * @param node * root node to start from * @param prefix * @param isTail * @return */ protected String reportCorpusStructure(SCorpusGraph corpusGraph, SNode node, String prefix, boolean isTail) { StringBuilder retStr = new StringBuilder(); retStr.append(prefix); retStr.append(((isTail ? " " : " ") + node.getName())); retStr.append("\n"); List<SRelation<SNode, SNode>> outRelations = corpusGraph.getOutRelations(node.getId()); int i = 0; for (Relation out : outRelations) { if (i < outRelations.size() - 1) { retStr.append(prefix); retStr.append(reportCorpusStructure(corpusGraph, (SNode) out.getTarget(), prefix + (isTail ? " " : " "), false)); } else { retStr.append(reportCorpusStructure(corpusGraph, (SNode) out.getTarget(), prefix + (isTail ? " " : " "), true)); } i++; } return (retStr.toString()); } /** * Reads customization property * {@link PepperModuleProperties#PROP_AFTER_COPY_RES} and copies the listed * resources to the named target folder. */ public void copyResources(String resString) { if ((resString != null) && (!resString.isEmpty())) { String[] resources = resString.split(";"); if (resources.length > 0) { for (String resource : resources) { resource = resource.trim(); String[] parts = resource.split("->"); if (parts.length == 2) { String sourceStr = parts[0]; String targetStr = parts[1]; sourceStr = sourceStr.trim(); targetStr = targetStr.trim(); // check if source and target is given boolean copyOk = true; if ((sourceStr == null) || (sourceStr.isEmpty())) { logger.warn("Cannot copy resources for '" + getPepperModule().getName() + "' because no source file was given in property value '" + resource + "'. "); copyOk = false; } if ((targetStr == null) || (targetStr.isEmpty())) { logger.warn("Cannot copy resources for '" + getPepperModule().getName() + "' because no target file was given in property value '" + resource + "'. "); copyOk = false; } if (copyOk) { File source = new File(sourceStr); File target = new File(targetStr); // in case of source or target aren't absolute // resolve them against current Job's base directory String baseDir = getPepperModule().getModuleController().getJob().getBaseDir() .toFileString(); if (!baseDir.endsWith("/")) { baseDir = baseDir + "/"; } if (!source.isAbsolute()) { source = new File(baseDir + sourceStr); } if (!source.exists()) { logger.warn("Cannot copy resources for '" + getPepperModule().getName() + "' because source does not exist '" + source.getAbsolutePath() + "'. Check the property value '" + resource + "'. "); } else { // only copy if source exists if (!target.isAbsolute()) { target = new File(baseDir + targetStr); } if (!target.exists()) { if (!target.mkdirs()) { logger.warn("Cannot create folder {}. ", target); } } try { if (source.isDirectory()) { targetStr = target.getAbsolutePath(); if (!targetStr.endsWith("/")) { targetStr = targetStr + "/"; } target = new File(targetStr + source.getName()); FileUtils.copyDirectory(source, target); logger.trace("Copied resource from '" + source.getAbsolutePath() + "' to '" + target.getAbsolutePath() + "'."); } else { targetStr = target.getCanonicalPath(); if (!targetStr.endsWith("/")) { targetStr = targetStr + "/"; } target = new File(targetStr + source.getName()); FileUtils.copyFile(source, target); logger.trace("Copied resource from '" + source.getAbsolutePath() + "' to '" + target.getAbsolutePath() + "'."); } } catch (IOException e) { logger.warn("Cannot copy resources for '" + getPepperModule().getName() + "' because of '" + e.getMessage() + "'. Check the property value '" + resource + "'. "); } } } } } } } } /** * Invokes actions, before the mapping of a corpus or document was started. * This could be helpful, for instance to make some preparations for the * mapping. To trigger this pre processing for a specific Pepper module a * set of customization properties is available. Customization properties * triggering a pre processing starts with * {@value PepperModuleProperties#PREFIX_PEPPER_BEFORE}. This method is * called by the method {@link #map()}, before * {@link PepperMapper#mapSDocument()} was called. * * @param id * id of either {@link SDocument} or {@link SCorpus} object to be * prepared * @throws PepperModuleException */ public void before(Identifier id) throws PepperModuleException { if (getPepperModule().getProperties() != null) { if (getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_BEFORE_ADD_SLAYER) != null) { // add slayers after processing if ((id != null) && (id.getIdentifiableElement() != null)) { if (id.getIdentifiableElement() instanceof SDocument) { SDocument sDoc = (SDocument) id.getIdentifiableElement(); // add layers String layers = (String) getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_BEFORE_ADD_SLAYER).getValue(); addSLayers(sDoc, layers); } else if (id.getIdentifiableElement() instanceof SCorpus) { } } } if ((getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_BEFORE_READ_META) != null) && (getPepperModule().getProperties().getProperty(PepperModuleProperties.PROP_BEFORE_READ_META) .getValue() != null)) { // read meta data readMeta(id); } } } /** * Invokes actions, after the mapping of a corpus or document is done. This * could be helpful, for instance to make some processing after the mapping * e.g. adding all created nodes and relations to a layer. To trigger this * post processing for a specific Pepper module a set of customization * properties is available. Customization properties triggering a post * processing starts with * {@value PepperModuleProperties#PREFIX_PEPPER_AFTER}. This method is * called by the method {@link #map()}, after * {@link PepperMapper#mapSDocument()} was called. * * @param id * id of either {@link SDocument} or {@link SCorpus} object to be * post processed * @throws PepperModuleException */ public void after(Identifier id) throws PepperModuleException { if (getPepperModule().getProperties() != null) { if ((id != null) && (id.getIdentifiableElement() != null)) { if (getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_ADD_SLAYER) != null) { if (id.getIdentifiableElement() instanceof SDocument) { SDocument sDoc = (SDocument) id.getIdentifiableElement(); // add slayers after processing String layers = (String) getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_ADD_SLAYER).getValue(); addSLayers(sDoc, layers); } } if (getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_RENAME_ANNOTATIONS).getValue() != null) { if (id.getIdentifiableElement() instanceof SDocument && ((SDocument) id.getIdentifiableElement()).getDocumentGraph() != null) { renameAnnotations(id, (String) getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_RENAME_ANNOTATIONS).getValue()); } } if (getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_REMOVE_ANNOTATIONS).getValue() != null) { //remove is also done by rename if (id.getIdentifiableElement() instanceof SDocument && ((SDocument) id.getIdentifiableElement()).getDocumentGraph() != null) { renameAnnotations(id, (String) getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_REMOVE_ANNOTATIONS).getValue()); } } if ((Boolean) getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_AFTER_TOKENIZE).getValue()) { //remove is also done by rename if (id.getIdentifiableElement() instanceof SDocument && ((SDocument) id.getIdentifiableElement()).getDocumentGraph() != null) { ((SDocument) id.getIdentifiableElement()).getDocumentGraph().tokenize(); } } } } } // **************************************************************************************** // *** functions for before() and after() /** * Adds the passed layer to all nodes and objects in the passed * {@link SDocument}. * * @param sDoc * @param layers */ public void addSLayers(SDocument sDoc, String layers) { if ((layers != null) && (!layers.isEmpty())) { String[] layerArray = layers.split(";"); if (layerArray.length > 0) { for (String layer : layerArray) { layer = layer.trim(); // create SLayer and add to document-structure List<SLayer> sLayers = sDoc.getDocumentGraph().getLayerByName(layer); SLayer sLayer = null; if ((sLayers != null) && (sLayers.size() > 0)) { sLayer = sLayers.get(0); } if (sLayer == null) { sLayer = SaltFactory.createSLayer(); sLayer.setName(layer); sDoc.getDocumentGraph().addLayer(sLayer); } // add all nodes to new layer for (SNode sNode : sDoc.getDocumentGraph().getNodes()) { sNode.addLayer(sLayer); } // add all relations to new layer for (SRelation sRel : sDoc.getDocumentGraph().getRelations()) { sRel.addLayer(sLayer); } } } } } /** * Loads meta data form a meta data file and adds them to the object * corresponding to the passed {@link Identifier}. The meta data file is * localized in the directory in case of the URI corresponding to passed * {@link Identifier} is a directory or (in case the corresponding URI * addresses a file) in the same directory as the resource corresponding to * the passed {@link Identifier}. The meta data file must have the ending * passed in {@link PepperModuleProperties#PROP_BEFORE_READ_META}. * * @param id * identifying the current object */ public void readMeta(Identifier id) { if (getPepperModule() instanceof PepperImporter) { URI resourceURI = ((PepperImporter) getPepperModule()).getIdentifier2ResourceTable().get(id); Object endingObj = getPepperModule().getProperties() .getProperty(PepperModuleProperties.PROP_BEFORE_READ_META).getValue(); if (endingObj != null) { String ending = endingObj.toString().trim(); if (resourceURI != null) { File resource = new File(resourceURI.toFileString()); File metaFile = null; if (resource.isDirectory()) { // resource is directory, search for meta data file // (all files having customized ending) File[] files = resource.listFiles(); if (files != null) { for (File file : files) { if (file.getName().equalsIgnoreCase( ((SNode) id.getIdentifiableElement()).getPath().lastSegment() + "." + ending)) { metaFile = file; break; } } } } else { // resource is a file, search for meta data file // (file having the same name as current corpus or // document and having customized ending) String[] parts = resource.getName().split("[.]"); if (parts != null) { metaFile = new File(resource.getAbsolutePath().substring(0, resource.getAbsolutePath().lastIndexOf(".")) + "." + ending); if (!metaFile.exists()) { metaFile = null; } } } if (metaFile != null) { Properties props = new Properties(); try (FileInputStream str = new FileInputStream(metaFile)) { props.load(str); } catch (IOException e) { logger.warn("Tried to load meta data file '" + metaFile.getAbsolutePath() + "', but a problem occured: " + e.getMessage() + ". ", e); } for (Object key : props.keySet()) { IdentifiableElement container = id.getIdentifiableElement(); if ((container != null) && (container instanceof SAnnotationContainer)) { if (!((SAnnotationContainer) container).containsLabel(key.toString())) { ((SAnnotationContainer) container).createMetaAnnotation(null, key.toString(), props.getProperty(key.toString())); } else { logger.warn("Cannot add meta annotation '" + key.toString() + "', because it already exist on object '" + id.getId() + "' please check file '" + metaFile.getAbsolutePath() + "'. "); } } } } } } } } /** * Renames all annotations matching the search template to the new * namespace, name or value. To rename an annotation, use the following * syntax: * "old_namespace::old_name=old_value := new_namespace::new_name=new_value", * determining the name is mandatory whereas the namespace and value are * optional. For instance a pos annotation can be renamed as follows: * "salt::pos:=part-of-speech". A list of renamings must be separated with * ";". * * @param id * identifying the current object */ public void renameAnnotations(Identifier id, String renameTemplate) { if (id != null && id.getIdentifiableElement() != null) { try { Map<String[], String[]> renamingMap = new HashMap<>(); // split all single renaming strings String[] renamings = renameTemplate.split(";"); for (String renaming : renamings) { String[] parts = renaming.split(":="); if (parts.length == 2) { renamingMap.put(SaltUtil.unmarshalAnnotation(parts[0]).iterator().next(), SaltUtil.unmarshalAnnotation(parts[1]).iterator().next()); } else if (parts.length == 1) { renamingMap.put(SaltUtil.unmarshalAnnotation(parts[0]).iterator().next(), null); } } SDocument document = (SDocument) id.getIdentifiableElement(); // rename all annotations of nodes Iterator<SAnnotationContainer> it = (Iterator<SAnnotationContainer>) (Iterator<? extends SAnnotationContainer>) document .getDocumentGraph().getNodes().iterator(); rename(it, renamingMap); // rename all annotations of relations it = (Iterator<SAnnotationContainer>) (Iterator<? extends SAnnotationContainer>) document .getDocumentGraph().getRelations().iterator(); rename(it, renamingMap); } catch (RuntimeException e) { e.printStackTrace(); logger.warn("Cannot rename labels in object '{}', because of a nested exeption '{}'. ", id, e.getMessage()); } } } private void rename(Iterator<SAnnotationContainer> it, Map<String[], String[]> renamingMap) { while (it.hasNext()) { SAnnotationContainer node = it.next(); for (Map.Entry<String[], String[]> entry : renamingMap.entrySet()) { Label label = node.getLabel(entry.getKey()[0], entry.getKey()[1]); if (label != null) { if (entry.getValue() == null) { //remove label node.removeLabel(label.getQName()); } else if (label.getQName() .equals(SaltUtil.createQName(entry.getValue()[0], entry.getValue()[1]))) { // if only value is different label.setValue(entry.getValue()[2]); } else { // namespace or name are different --> remove label and // create a new one node.removeLabel(label.getQName()); if (label instanceof SAnnotation) { if (entry.getValue()[2] == null) { // copy annotation value node.createAnnotation(entry.getValue()[0], entry.getValue()[1], label.getValue()); } else { // use new annotation value node.createAnnotation(entry.getValue()[0], entry.getValue()[1], entry.getValue()[2]); } } else if (label instanceof SMetaAnnotation) { if (entry.getValue()[2] == null) { // copy annotation value node.createMetaAnnotation(entry.getValue()[0], entry.getValue()[1], label.getValue()); } else { // use new annotation value node.createMetaAnnotation(entry.getValue()[0], entry.getValue()[1], entry.getValue()[2]); } } } } } } } // *** functions for before() and after() // **************************************************************************************** }