org.apache.uima.ruta.descriptor.RutaDescriptorBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.uima.ruta.descriptor.RutaDescriptorBuilder.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.ruta.descriptor;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.apache.uima.ResourceSpecifierFactory;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.impl.ResourceManager_impl;
import org.apache.uima.resource.metadata.Capability;
import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
import org.apache.uima.resource.metadata.FsIndexDescription;
import org.apache.uima.resource.metadata.Import;
import org.apache.uima.resource.metadata.TypeDescription;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.resource.metadata.impl.Import_impl;
import org.apache.uima.ruta.UIMAConstants;
import org.apache.uima.ruta.engine.RutaEngine;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.XMLSerializer;
import org.apache.uima.util.XMLizable;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class RutaDescriptorBuilder {

    private static ResourceSpecifierFactory uimaFactory = UIMAFramework.getResourceSpecifierFactory();

    private final URL defaultTypeSystem;

    private final URL defaultAnalysisEngine;

    public RutaDescriptorBuilder(URL defaultTypeSystem, URL defaultAnalysisEngine)
            throws InvalidXMLException, IOException {
        super();
        this.defaultTypeSystem = defaultTypeSystem;
        this.defaultAnalysisEngine = defaultAnalysisEngine;
    }

    public TypeSystemDescription createTypeSystemDescription(RutaDescriptorInformation desc,
            String typeSystemOutput, RutaBuildOptions options, String[] enginePaths)
            throws InvalidXMLException, ResourceInitializationException, IOException, URISyntaxException {

        TypeSystemDescription typeSystemDescription = uimaFactory.createTypeSystemDescription();

        ResourceManager rm = UIMAFramework.newDefaultResourceManager();
        if (options.getClassLoader() != null) {
            rm = new ResourceManager_impl(options.getClassLoader());
        }
        if (enginePaths != null) {
            String dataPath = "";
            for (String string : enginePaths) {
                dataPath += string + File.pathSeparator;
            }
            rm.setDataPath(dataPath);
        }
        Map<String, String> typeNameMap = new HashMap<String, String>();
        TypeSystemDescription initialTypeSystem = UIMAFramework.getXMLParser()
                .parseTypeSystemDescription(new XMLInputSource(defaultTypeSystem));
        CAS cas = CasCreationUtils.createCas(initialTypeSystem, null, new FsIndexDescription[0]);
        fillTypeNameMap(typeNameMap, cas.getTypeSystem());
        cas.release();
        List<TypeSystemDescription> toInclude = new ArrayList<TypeSystemDescription>();
        List<Import> importList = new ArrayList<Import>();
        Import_impl import_impl = new Import_impl();
        if (options.isImportByName()) {
            String name = initialTypeSystem.getName();
            import_impl.setName(name);
        } else if (options.isResolveImports()) {
            String absoluteLocation = initialTypeSystem.getSourceUrlString();
            import_impl.setLocation(absoluteLocation);
        } else {
            URI uri = null;
            try {
                uri = defaultTypeSystem.toURI();
            } catch (URISyntaxException e) {
                // do nothing
            }
            if (uri != null) {
                String relativeLocation = getRelativeLocation(uri, typeSystemOutput);
                if (relativeLocation != null) {
                    import_impl.setLocation(relativeLocation);
                } else {
                    toInclude.add(initialTypeSystem);
                }
            } else {
                toInclude.add(initialTypeSystem);
            }
        }
        if (import_impl.getLocation() != null || import_impl.getName() != null) {
            importList.add(import_impl);
        }
        for (String eachName : desc.getImportedTypeSystems()) {
            String locate = RutaEngine.locate(eachName, enginePaths, ".xml");
            URL url = null;
            boolean include = false;
            if (locate != null) {
                File file = new File(locate);
                url = file.toURI().toURL();
            }
            if (url == null) {
                url = checkImportExistence(eachName, ".xml", options.getClassLoader());
                include = true;
                if (url == null) {
                    throw new FileNotFoundException(
                            "Build process can't find " + eachName + " in " + desc.getScriptName());
                }
            }
            TypeSystemDescription each = getTypeSystemDescriptor(url, options, rm);
            if (each != null) {
                fillTypeNameMap(typeNameMap, each);
                if (include) {
                    // need to include the complete type system because an import is not possible
                    each.resolveImports(rm);
                    toInclude.add(each);
                } else {
                    import_impl = new Import_impl();
                    if (options.isImportByName()) {
                        import_impl.setName(eachName);
                    } else if (options.isResolveImports()) {
                        String absoluteLocation = each.getSourceUrlString();
                        import_impl.setLocation(absoluteLocation);
                    } else {
                        String relativeLocation = getRelativeLocation(url.toURI(), typeSystemOutput);
                        File parentFile = new File(typeSystemOutput).getParentFile();
                        File targetFile = new File(parentFile, relativeLocation);
                        boolean ableToFindFile = targetFile.exists();
                        if (!ableToFindFile) {
                            // hotfix for different partitions making trouble for the relative path
                            import_impl.setName(eachName);
                        } else {
                            import_impl.setLocation(relativeLocation);
                        }
                    }
                    importList.add(import_impl);
                }
            } else {
                throw new FileNotFoundException(
                        "Build process can't find " + eachName + " in " + desc.getScriptName());
            }
        }
        for (String eachName : desc.getImportedScripts()) {
            String locate = RutaEngine.locate(eachName, enginePaths, options.getTypeSystemSuffix() + ".xml");
            URL url = null;
            if (locate != null) {
                File file = new File(locate);
                url = file.toURI().toURL();
            }
            if (url == null) {
                url = checkImportExistence(eachName, options.getTypeSystemSuffix() + ".xml",
                        options.getClassLoader());
                if (url == null) {
                    throw new FileNotFoundException("Build process can't find " + eachName
                            + options.getTypeSystemSuffix() + ".xml" + " in " + desc.getScriptName());
                }
            }
            TypeSystemDescription each = getTypeSystemDescriptor(url, options, rm);
            if (each != null) {
                fillTypeNameMap(typeNameMap, each);
                import_impl = new Import_impl();
                if (options.isImportByName()) {
                    import_impl.setName(eachName + options.getTypeSystemSuffix());
                } else if (options.isResolveImports()) {
                    String absoluteLocation = each.getSourceUrlString();
                    import_impl.setLocation(absoluteLocation);
                } else {
                    String relativeLocation = getRelativeLocation(url.toURI(), typeSystemOutput);
                    import_impl.setLocation(relativeLocation);
                }
                importList.add(import_impl);
            } else {
                throw new FileNotFoundException(
                        "Build process can't find " + eachName + " in " + desc.getScriptName());
            }
        }
        typeSystemDescription = CasCreationUtils.mergeTypeSystems(toInclude, rm);
        if (!importList.isEmpty()) {
            Import[] newImports = importList.toArray(new Import[0]);
            typeSystemDescription.setImports(newImports);
        }
        if (options.isResolveImports()) {
            typeSystemDescription.resolveImports(rm);
        }

        // TODO hotfixes: where do I get the final types??
        Set<String> finalTypes = new HashSet<String>();
        finalTypes.addAll(Arrays.asList(new String[] { "uima.cas.Boolean", "uima.cas.Byte", "uima.cas.Short",
                "uima.cas.Integer", "uima.cas.Long", "uima.cas.Float", "uima.cas.Double", "uima.cas.BooleanArray",
                "uima.cas.ByteArray", "uima.cas.ShortArray", "uima.cas.IntegerArray", "uima.cas.LongArray",
                "uima.cas.FloatArray", "uima.cas.DoubleArray", "uima.cas.StringArray", "uima.cas.FSArray" }));

        int typeIndex = 0;
        for (String eachType : desc.getTypeShortNames()) {
            StringTriple typeTriple = desc.getTypeTriples().get(typeIndex);
            typeTriple = resolveType(typeTriple, typeNameMap, desc.getScriptName());
            if (typeSystemDescription.getType(typeTriple.getName()) != null) {
                continue;
            }
            if (!finalTypes.contains(typeTriple.getParent())) {
                TypeDescription newType = typeSystemDescription.addType(typeTriple.getName(),
                        typeTriple.getDescription(), typeTriple.getParent());

                Collection<StringTriple> collection = desc.getFeatures().get(eachType);
                if (collection != null) {
                    for (StringTriple eachFeature : collection) {
                        eachFeature = resolveFeature(eachFeature, typeNameMap);
                        newType.addFeature(eachFeature.getName(), eachFeature.getDescription(),
                                eachFeature.getParent());
                        // capability.addInputFeature(eachFeature.getName());
                        // capability.addOutputFeature(eachFeature.getName());
                    }
                }
            }
            typeIndex++;
        }

        Set<String> names = new HashSet<String>();
        Collection<TypeDescription> types = new HashSet<TypeDescription>();
        for (TypeDescription each : typeSystemDescription.getTypes()) {
            String name = each.getName();
            if (!names.contains(name)) {
                names.add(name);
                types.add(each);
            }
        }

        TypeDescription[] presentTypes = typeSystemDescription.getTypes();

        types.addAll(Arrays.asList(presentTypes));
        typeSystemDescription.setTypes(types.toArray(new TypeDescription[0]));
        typeSystemDescription
                .setName(desc.getPackageString() + "." + desc.getScriptName() + options.getTypeSystemSuffix());
        if (typeSystemOutput != null) {
            File typeSystemFile = getFile(typeSystemOutput);
            typeSystemDescription.setSourceUrl(typeSystemFile.toURI().toURL());
        }
        return typeSystemDescription;
    }

    public AnalysisEngineDescription createAnalysisEngineDescription(RutaDescriptorInformation desc,
            TypeSystemDescription typeSystemDescription, String typeSystemOutput, String engineOutput,
            RutaBuildOptions options, String[] scriptPaths, String[] enginePaths, String[] resourcePaths)
            throws InvalidXMLException, IOException {
        TypeSystemDescription aets = uimaFactory.createTypeSystemDescription();
        Import_impl import_impl = new Import_impl();
        if (options.isImportByName()) {
            if (typeSystemDescription != null) {
                import_impl.setName(typeSystemDescription.getName());
            }
        } else {
            if (typeSystemOutput != null) {
                String relativeLocation = getRelativeLocation(new File(typeSystemOutput).toURI(), engineOutput);
                import_impl.setLocation(relativeLocation);
            }
        }

        return configureEngine(desc, engineOutput, options, scriptPaths, enginePaths, resourcePaths, import_impl,
                aets);
    }

    public void build(RutaDescriptorInformation desc, String typeSystemOutput, String engineOutput,
            RutaBuildOptions options, String[] scriptPaths, String[] enginePaths) throws SAXException,
            InvalidXMLException, IOException, ResourceInitializationException, URISyntaxException {

        TypeSystemDescription typeSystemDescription = createTypeSystemDescription(desc, typeSystemOutput, options,
                enginePaths);

        AnalysisEngineDescription analysisEngineDescription = createAnalysisEngineDescription(desc,
                typeSystemDescription, typeSystemOutput, engineOutput, options, scriptPaths, enginePaths, null);

        File analysisEngineFile = getFile(engineOutput);
        File typeSystemFile = getFile(typeSystemOutput);
        toFile(typeSystemDescription, typeSystemFile);
        toFile(analysisEngineDescription, analysisEngineFile);
    }

    private void fillTypeNameMap(Map<String, String> typeNameMap, TypeSystem typeSystem) {
        Iterator<Type> typeIterator = typeSystem.getTypeIterator();
        while (typeIterator.hasNext()) {
            Type type = (Type) typeIterator.next();
            String shortName = type.getShortName();
            String name = type.getName();
            typeNameMap.put(shortName, name);
        }
    }

    private boolean fillTypeNameMap(Map<String, String> typeNameMap, TypeSystemDescription desc) {
        boolean contained = false;
        for (TypeDescription each : desc.getTypes()) {
            String name = each.getName();
            int lastIndexOf = name.lastIndexOf(".");
            String shortName = name.substring(lastIndexOf + 1, name.length());
            typeNameMap.put(shortName, name);
        }
        return contained;
    }

    private StringTriple resolveFeature(StringTriple eachFeature, Map<String, String> types) {
        String parent = eachFeature.getParent();
        String name = eachFeature.getName();
        parent = translate(parent);
        if (parent.indexOf(".") == -1 && types.containsKey(parent)) {
            parent = types.get(parent);
        }
        return new StringTriple(name, eachFeature.getDescription(), parent);
    }

    private StringTriple resolveType(StringTriple typeTriple, Map<String, String> types, String packageName) {
        String parent = typeTriple.getParent();
        String name = typeTriple.getName();
        if (parent == null) {
            parent = "uima.tcas.Annotation";
        }
        parent = translate(parent);
        name = translate(name);
        if (parent.indexOf(".") == -1 && types.containsKey(parent)) {
            parent = types.get(parent);
        }
        if (name.indexOf(".") == -1) {
            if (types.containsKey(name)) {
                name = types.get(name);
            } else {
                name = packageName + "." + name;
            }
        }
        int lastIndexOf = name.lastIndexOf(".");
        String shortName = name.substring(lastIndexOf + 1, name.length());
        types.put(shortName, name);
        return new StringTriple(name, typeTriple.getDescription(), parent);
    }

    private String translate(String name) {
        if (name == null) {
            return null;
        }
        if (name.equals("Annotation")) {
            return "uima.tcas.Annotation";
        } else if (name.equals("STRING")) {
            return UIMAConstants.TYPE_STRING;
        } else if (name.equals("INT")) {
            return UIMAConstants.TYPE_INTEGER;
        } else if (name.equals("DOUBLE")) {
            return UIMAConstants.TYPE_DOUBLE;
        } else if (name.equals("FLOAT")) {
            return UIMAConstants.TYPE_FLOAT;
        } else if (name.equals("BOOLEAN")) {
            return UIMAConstants.TYPE_BOOLEAN;
        } else if (name.equals("TYPE")) {
            return UIMAConstants.TYPE_STRING;
        }
        return name;
    }

    private AnalysisEngineDescription configureEngine(RutaDescriptorInformation desc, String engineOutput,
            RutaBuildOptions option, String[] scriptPaths, String[] descriptorPaths, String[] resourcePaths,
            Import_impl import_impl, TypeSystemDescription aets) throws InvalidXMLException, IOException {

        AnalysisEngineDescription analysisEngineDescription = UIMAFramework.getXMLParser()
                .parseAnalysisEngineDescription(new XMLInputSource(defaultAnalysisEngine));
        aets.setImports(new Import[] { import_impl });
        analysisEngineDescription.getAnalysisEngineMetaData().setTypeSystem(aets);

        File file = getFile(engineOutput);
        analysisEngineDescription.setSourceUrl(file.toURI().toURL());

        if (!desc.getTypeShortNames().isEmpty()) {
            Capability capability = uimaFactory.createCapability();
            for (StringTriple typeTriple : desc.getTypeTriples()) {
                capability.addInputType(typeTriple.getName(), false);
                capability.addOutputType(typeTriple.getName(), false);
            }
            Capability[] capabilities = analysisEngineDescription.getAnalysisEngineMetaData().getCapabilities();
            Capability[] newArray = new Capability[capabilities.length + 1];
            System.arraycopy(capabilities, 0, newArray, 0, capabilities.length);
            newArray[capabilities.length] = capability;
            analysisEngineDescription.getAnalysisEngineMetaData().setCapabilities(newArray);
        }

        String mainScript = desc.getScriptName();
        if (!StringUtils.isBlank(desc.getPackageString())) {
            mainScript = desc.getPackageString().concat(".").concat(mainScript);
        }
        analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
                .setParameterValue(RutaEngine.PARAM_MAIN_SCRIPT, mainScript);
        if (scriptPaths != null) {
            analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
                    .setParameterValue(RutaEngine.PARAM_SCRIPT_PATHS, scriptPaths);
        }
        if (descriptorPaths != null) {
            analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
                    .setParameterValue(RutaEngine.PARAM_DESCRIPTOR_PATHS, descriptorPaths);
        }
        String[] parameterValue = (String[]) analysisEngineDescription.getAnalysisEngineMetaData()
                .getConfigurationParameterSettings().getParameterValue(RutaEngine.PARAM_RESOURCE_PATHS);
        Set<String> resourceLocations = new HashSet<String>();

        if (parameterValue != null && parameterValue.length != 0) {
            resourceLocations.addAll(Arrays.asList(parameterValue));
        }
        if (resourcePaths != null) {
            resourceLocations.addAll(Arrays.asList(resourcePaths));
        }

        analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
                .setParameterValue(RutaEngine.PARAM_RESOURCE_PATHS, resourceLocations.toArray(new String[0]));

        String[] additionalScriptsArray = desc.getImportedScripts().toArray(new String[] {});
        analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
                .setParameterValue(RutaEngine.PARAM_ADDITIONAL_SCRIPTS, additionalScriptsArray);

        String[] additionalEnginesArray = desc.getImportedEngines().toArray(new String[] {});
        analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
                .setParameterValue(RutaEngine.PARAM_ADDITIONAL_ENGINES, additionalEnginesArray);

        String[] additionalUimafitEnginesArray = desc.getImportedUimafitEngines().toArray(new String[] {});
        analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
                .setParameterValue(RutaEngine.PARAM_ADDITIONAL_UIMAFIT_ENGINES, additionalUimafitEnginesArray);

        analysisEngineDescription.getAnalysisEngineMetaData().setTypeSystem(aets);

        configureExtensions(analysisEngineDescription, option);
        return analysisEngineDescription;
    }

    private String getRelativeLocation(URI target, String base) {
        if (base == null) {
            return null;
        }
        Path basePath = Paths.get(base);
        if (!basePath.toFile().isDirectory()) {
            basePath = basePath.getParent();
        }
        Path targetPath = null;
        try {
            targetPath = Paths.get(target);
        } catch (Exception e) {
            return null;
        }
        Path relativePath = null;
        try {
            relativePath = basePath.relativize(targetPath);
        } catch (Exception e) {
            return null;
        }
        // HOTFIX: avoid windows paths. No generic solution to access a portable string found yet for
        // Path
        String result = relativePath.toString().replaceAll("\\\\", "/");
        return result;
    }

    private void configureExtensions(AnalysisEngineDescription analysisEngineDescription,
            RutaBuildOptions options) {
        ConfigurationParameterSettings configurationParameterSettings = analysisEngineDescription
                .getAnalysisEngineMetaData().getConfigurationParameterSettings();

        List<String> languageExtensions = options.getLanguageExtensions();

        String[] extensions = (String[]) configurationParameterSettings
                .getParameterValue(RutaEngine.PARAM_ADDITIONAL_EXTENSIONS);
        String[] loaders = (String[]) configurationParameterSettings
                .getParameterValue(RutaEngine.PARAM_ADDITIONAL_ENGINE_LOADERS);

        List<String> es = new ArrayList<String>();
        List<String> ls = new ArrayList<String>();
        if (extensions != null) {
            es.addAll(Arrays.asList(extensions));
        }
        es.addAll(languageExtensions);
        if (loaders != null) {
            ls.addAll(Arrays.asList(loaders));
        }
        ls.addAll(options.getEngineLoaders());

        configurationParameterSettings.setParameterValue(RutaEngine.PARAM_ADDITIONAL_EXTENSIONS,
                es.toArray(new String[0]));
        configurationParameterSettings.setParameterValue(RutaEngine.PARAM_ADDITIONAL_ENGINE_LOADERS,
                ls.toArray(new String[0]));
    }

    private TypeSystemDescription getTypeSystemDescriptor(URL url, RutaBuildOptions option, ResourceManager rm)
            throws InvalidXMLException, IOException {
        TypeSystemDescription tsdesc = UIMAFramework.getXMLParser()
                .parseTypeSystemDescription(new XMLInputSource(url));
        if (option.isResolveImports()) {
            tsdesc.resolveImports(rm);
        }
        return tsdesc;
    }

    private File getFile(String location) {
        return new File(location);
    }

    private void toFile(XMLizable desc, File destination) throws SAXException, FileNotFoundException {
        destination.getParentFile().mkdirs();
        OutputStream out = new FileOutputStream(destination);
        XMLSerializer sax = new XMLSerializer(out);
        ContentHandler ch = sax.getContentHandler();
        ch.startDocument();
        desc.toXML(ch);
        ch.endDocument();
    }

    public static URL checkImportExistence(String candidate, String extension, ClassLoader classloader)
            throws IOException {
        String p = candidate.replaceAll("[.]", "/");
        p += extension;
        PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(classloader);
        String prefix = "classpath*:";
        String pattern = prefix + p;
        Resource[] resources = resolver.getResources(pattern);
        if (resources == null || resources.length == 0) {
            return null;
        } else {
            Resource resource = resources[0];
            URL url = resource.getURL();
            return url;
        }
    }
}