org.anc.lapps.nlp4j.NLP4JCustomTrain.java Source code

Java tutorial

Introduction

Here is the source code for org.anc.lapps.nlp4j.NLP4JCustomTrain.java

Source

package org.anc.lapps.nlp4j;

import edu.emory.mathcs.nlp.bin.NLPTrain;
import edu.emory.mathcs.nlp.component.template.feature.Field;
import edu.emory.mathcs.nlp.component.template.feature.Relation;
import edu.emory.mathcs.nlp.component.template.feature.Source;
import edu.emory.mathcs.nlp.component.template.util.NLPMode;
import org.apache.commons.lang3.EnumUtils;
import org.lappsgrid.api.ProcessingService;
import org.lappsgrid.discriminator.Discriminators;
import org.lappsgrid.metadata.IOSpecification;
import org.lappsgrid.metadata.ServiceMetadata;
import org.lappsgrid.serialization.Data;
import org.lappsgrid.serialization.Serializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.PatternSyntaxException;

/**
 * @author Alexandru Mahmoud
 */

public class NLP4JCustomTrain implements ProcessingService {
    /**
     * The Json String required by getMetadata()
     */
    private String metadata;
    private Logger logger;

    public NLP4JCustomTrain() {
        metadata = generateMetadata();
    }

    private String generateMetadata() {
        ServiceMetadata metadata = new ServiceMetadata();
        metadata.setName(this.getClass().getName());
        metadata.setDescription("The Train function from EmoryNLP's NLP4J project.");
        metadata.setVersion(Version.getVersion());
        metadata.setVendor("http://www.lappsgrid.org");
        metadata.setLicense(Discriminators.Uri.APACHE2);

        IOSpecification requires = new IOSpecification();
        requires.addFormat(Discriminators.Uri.TSV);
        requires.setEncoding("UTF-8");

        IOSpecification produces = new IOSpecification();
        produces.addFormat(Discriminators.Uri.LAPPS);
        produces.setEncoding("UTF-8");

        metadata.setRequires(requires);
        metadata.setProduces(produces);

        Data<ServiceMetadata> data = new Data<>();
        data.setDiscriminator(Discriminators.Uri.META);
        data.setPayload(metadata);
        return data.asPrettyJson();
    }

    @Override
    /**
     * Returns a JSON string containing metadata describing the service. The
     * JSON <em>must</em> conform to the json-schema at
     * <a href="http://vocab.lappsgrid.org/schema/service-schema.json">http://vocab.lappsgrid.org/schema/service-schema.json</a>
     * (processing services) or
     * <a href="http://vocab.lappsgrid.org/schema/datasource-schema.json">http://vocab.lappsgrid.org/schema/datasource-schema.json</a>
     * (datasources).
     */
    public String getMetadata() {
        return metadata;
    }

    /**
     * Entry point for a Lappsgrid service.
     * <p>
     * Each service on the Lappsgrid will accept {@code org.lappsgrid.serialization.Data} object
     * and return a {@code Data} object with a {@code org.lappsgrid.serialization.lif.Container}
     * payload.
     * <p>
     * Errors and exceptions that occur during processing should be wrapped in a {@code Data}
     * object with the discriminator set to http://vocab.lappsgrid.org/ns/error
     * <p>
     * See <a href="https://lapp.github.io/org.lappsgrid.serialization/index.html?org/lappsgrid/serialization/Data.html>org.lappsgrid.serialization.Data</a><br />
     * See <a href="https://lapp.github.io/org.lappsgrid.serialization/index.html?org/lappsgrid/serialization/lif/Container.html>org.lappsgrid.serialization.lif.Container</a><br />
     *
     * @param input A JSON string representing a Data object
     * @return A JSON string containing a Data object with a Container payload.
     */
    @Override
    public String execute(String input) {

        logger = LoggerFactory.getLogger(NLP4JCustomTrain.class);

        // Parse the JSON string into a Data object, and extract its discriminator.
        Data<String> data = Serializer.parse(input, Data.class);
        String discriminator = data.getDiscriminator();

        // If the Input discriminator is ERROR, return the Data as is, since it's already a wrapped error.
        if (Discriminators.Uri.ERROR.equals(discriminator)) {
            return input;
        }

        // If the Input discriminator is not GET, return a wrapped Error with an appropriate message.
        else if (!Discriminators.Uri.GET.equals(discriminator)) {
            String errorData = generateError(
                    "Invalid discriminator.\nExpected " + Discriminators.Uri.GET + "\nFound " + discriminator);
            logger.error(errorData);
            return errorData;
        }

        // Output an error if no payload is given, since an input is required to run the program
        if (data.getPayload() == null) {
            String errorData = generateError("No input given.");
            logger.error(errorData);
            return errorData;
        }

        // Else (if a payload is given), process the input
        else {
            // Create temporary directories to hold input and output. This is needed because
            // the RankLib methods need directories for most of their processing, so the input
            // will be given within files in a directory, and the output will be read from files
            // in the output directory.
            Path outputDirPath = null;
            Path inputDirPath = null;
            try {
                outputDirPath = Files.createTempDirectory("output");
                outputDirPath.toFile().deleteOnExit();
                inputDirPath = Files.createTempDirectory("input");
                inputDirPath.toFile().deleteOnExit();
            }
            // Since we are only handling files created by the function, there should never be
            // a problem with these files. If there is, notify the user of the error.
            catch (IOException e) {
                String errorData = generateError("Error in creating temporary input/output directories.");
                logger.error(errorData);
                return errorData;
            }

            StringBuilder params = new StringBuilder("-c ");

            try {
                String configPath = makeConfigFile(inputDirPath, data);
                if (configPath.contains("ERROR")) {
                    if (configPath.contains("INDEX ERROR")) {
                        StringBuilder errorMsg = new StringBuilder(
                                "The given list of TSV indices and TSV fields did not match.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given indices: ").append(errorParts[1]);
                        errorMsg.append("\r\nGiven fields: ").append(errorParts[2]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }

                    else if (configPath.contains("AMBIGUITY ERROR")) {
                        StringBuilder errorMsg = new StringBuilder(
                                "Invalid field given for ambiguity classes.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given: ").append(errorParts[1]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }

                    else if (configPath.contains("CLUSTERS ERROR")) {
                        StringBuilder errorMsg = new StringBuilder("Invalid field given for word clusters.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given: ").append(errorParts[1]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }

                    else if (configPath.contains("NAMED ENTITY ERROR")) {
                        StringBuilder errorMsg = new StringBuilder(
                                "Invalid field given for named entity gazetteers.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given: ").append(errorParts[1]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }

                    else if (configPath.contains("EMBEDDINGS ERROR")) {
                        StringBuilder errorMsg = new StringBuilder("Invalid field given for word embeddings.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given: ").append(errorParts[1]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }

                    else if (configPath.contains("ALGORITHM ERROR")) {
                        StringBuilder errorMsg = new StringBuilder(
                                "Invalid name given for optimizer algorithm.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given: ").append(errorParts[1]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }

                    else if (configPath.contains("INVALID FEATURE SOURCE ERROR")) {
                        StringBuilder errorMsg = new StringBuilder("Invalid source given for feature.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given: ").append(errorParts[1]);
                        errorMsg.append("\r\nFeature line number: ").append(errorParts[2]);
                        errorMsg.append("\r\nFeature number: f").append(errorParts[3]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }

                    else if (configPath.contains("INVALID FEATURE RELATION ERROR")) {
                        StringBuilder errorMsg = new StringBuilder("Invalid relation given for feature.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given: ").append(errorParts[1]);
                        errorMsg.append("\r\nFeature line number: ").append(errorParts[2]);
                        errorMsg.append("\r\nFeature number: f").append(errorParts[3]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }

                    else if (configPath.contains("INVALID FEATURE FIELD ERROR")) {
                        StringBuilder errorMsg = new StringBuilder("Invalid field given for feature.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given: ").append(errorParts[1]);
                        errorMsg.append("\r\nFeature line number: ").append(errorParts[2]);
                        errorMsg.append("\r\nFeature number: f").append(errorParts[3]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }

                    else {
                        StringBuilder errorMsg = new StringBuilder(
                                "Unknown error found in configuration parameters.\r\n");
                        errorMsg.append("String returned: ").append(configPath);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }
                }

                // Call the method that converts the parameters to the format that they would
                // be in when given from command-line.
                params.append(configPath);

                String convertedParams = convertParameters(data, outputDirPath, inputDirPath).replace("\\", "/");

                if (convertedParams.contains("ERROR")) {
                    if (convertedParams.contains("MODE ERROR")) {
                        StringBuilder errorMsg = new StringBuilder("Invalid mode parameter given.\r\n");
                        String[] errorParts;
                        errorParts = configPath.split(";");
                        errorMsg.append("Given: ").append(errorParts[1]);

                        String errorData = generateError(errorMsg.toString());
                        logger.error(errorData);
                        return errorData;
                    }
                }

                params.append(convertedParams);
            }
            // Since we are only handling files created by the function, there should never be
            // a problem with these files. If there is notify
            catch (IOException e) {
                String errorData = generateError("Error in handling of temporary files.");
                logger.error(errorData);
                return errorData;
            }
            String[] paramsArray;

            // Split the parameters into an array, which will be given as the args[] argument
            // to the main methods of RankLib.
            try {
                paramsArray = params.toString().split("\\s+");
            } catch (PatternSyntaxException ex) {
                String errorData = generateError("Error in parameter syntax.");
                logger.error(errorData);
                return errorData;
            }

            // Create a stream to hold the output from System.out.println. This is necessary
            // because when running, the program will print things from many RankLib classes and
            // methods. So the printed output will be "caught" and saved to output.
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            PrintStream ps = new PrintStream(baos);
            // Save the old System.out PrintStream, to reset at the end of the program.
            PrintStream oldPrintStream = System.out;
            // Set the special stream as the out stream
            System.setOut(ps);

            NLPTrain.main(paramsArray);

            // Set System.out back to the original PrintStream
            System.out.flush();
            System.setOut(oldPrintStream);

            // Make a Map to hold both the printed, and file outputs.
            Map<String, String> outputPayload = new HashMap<>();

            String finalPrint;

            if (data.getParameter("saveModel") != null) {
                StringBuilder toRemove = new StringBuilder(
                        "Name not implemented for OnlineComponent. Input name - ");
                toRemove.append(data.getParameter("saveModel")).append(".xz will be ignored.\r\n");
                finalPrint = baos.toString().replace(toRemove.toString(), "");
            }

            else {
                finalPrint = baos.toString();
            }
            // Add the printed text caught from the out stream to the payload
            // with the "Printed" key

            outputPayload.put("Printed", finalPrint);

            // Parse the Map to Json, then put it as a payload to a Data object with a LAPPS
            // discriminator and return it as the final output
            String outputJson = Serializer.toJson(outputPayload);
            Data<String> output = new Data<>(Discriminators.Uri.LAPPS, outputJson);
            return output.asPrettyJson();
        }

    }

    /** This method takes in the input data and returns its parameters as an array of strings,
     * representing the parameters as they would be written to run the jar files from command-line,
     * to be given as input to the main classes.
     *
     * @param data A Data object
     * @param outputDirPath A Path to the output directory
     * @param inputDirPath A Path to the input directory
     * @return A String representing the parameters of the Data object.
     */
    protected String convertParameters(Data<String> data, Path outputDirPath, Path inputDirPath)
            throws IOException {
        StringBuilder params = new StringBuilder();

        // Get the payload and convert it back into a HashMap to get all input content from it.
        String payloadJson = data.getPayload();
        Map<String, String> payload = Serializer.parse(payloadJson, HashMap.class);

        // This boolean for development will be set to true if at least one of the keys corresponds
        // to a development file. This is needed since this is an optional parameter that might take
        // multiple input files.
        boolean developBool = false;

        // These are the parameters for training that give input.
        // Since the input can include many train and development files, we process
        // all keys expecting their labels to include "train", "dev", or "config."
        for (String key : payload.keySet()) {
            // If the input file is a train file, we take its content and save it to a
            // temporary file in the input directory. The entire directory will be given
            // as the train directory path, and the extension ".trn" will be specified
            // to distinguish the train files.
            if (key.contains("train")) {
                String fileContent = payload.get(key);
                writeTempFile(key, inputDirPath, fileContent, ".trn");
            }

            // If the input file is a development file, we take its content and save it to a
            // temporary file in the input directory, after setting the boolean to true.
            // The entire directory will be given as the development directory path and the
            // extension ".dev" will be specified to distinguish the development files.
            else if (key.contains("develop")) {
                developBool = true;
                String fileContent = payload.get(key);
                writeTempFile(key, inputDirPath, fileContent, ".dev");
            }

            // TODO: Add prev for previously trained models: -p
        }

        // Add the train directory parameter with the input directory path as an argument,
        // and specify the "trn" extension for train files.
        params.append(" -t ").append(inputDirPath).append(" -te trn");

        // If the boolean is set to true, add the development directory parameter with the
        // input directory path as an argument, and specify the "dev" extension for train files.
        if (developBool) {
            params.append(" -d ").append(inputDirPath).append(" -de dev");
        }

        if (data.getParameter("mode") != null) {
            String givenMode = (String) data.getParameter("mode");

            if (!EnumUtils.isValidEnum(NLPMode.class, givenMode)) {
                StringBuilder errorMsg = new StringBuilder("MODE ERROR;");
                errorMsg.append(givenMode);
                return errorMsg.toString();
            }
            params.append(" -mode ").append(data.getParameter("mode"));
        }

        // "Name not implemented for Online component."
        if (data.getParameter("saveModel") != null) {
            params.append(" -m ").append(outputDirPath).append("/").append(data.getParameter("saveModel"))
                    .append(".xz");
        }

        if (data.getParameter("cv") != null) {
            params.append(" -cv ").append(data.getParameter("cv"));
        }

        // Return the resulting list of parameters to be processed as an array
        // and given as input to the NLP4J Train main method.
        return params.toString();
    }

    /** This method creates the appropriate configuration file in the given temporary input
     * directory, and returns the path to that file as a String.
     *
     * @param dir The path to input directory in which the configuration file should be created
     * @param inputData The input data from which to extract configuration details
     * @return A String representing the path to the created configuration file.
     */
    protected String makeConfigFile(Path dir, Data<String> inputData) throws IOException {
        // This will hold the text for the configuration file, which is in XML format.
        StringBuilder configTxt = new StringBuilder("<configuration>\r\n");

        // START OF TSV FORMAT
        // Create an array of strings that will hold the field indices, if they are specified
        String[] tsvIndices = null;
        String indicesString = "";

        // Split the indices by commas, and remove whitespaces to have an array representing
        // all indices for the corresponding fields
        if (inputData.getParameter("tsv-indices") != null) {
            indicesString = (String) inputData.getParameter("tsv-indices");
            tsvIndices = indicesString.split(",[ ]*");
        }

        if (inputData.getParameter("tsv-fields") != null) {
            String fieldsString = (String) inputData.getParameter("tsv-fields");
            // Split and remove white spaces
            String[] tsvFields = fieldsString.split(",[ ]*");
            configTxt.append("    <tsv>\r\n");

            // If the indices are specified, add each field to its respective index
            if (tsvIndices != null) {
                String index, field;
                for (int i = 0; i < tsvFields.length; i++) {
                    try {
                        index = tsvIndices[i];
                        field = tsvFields[i];
                    }

                    // If any field does not have a corresponding index, and the indices
                    // are specified, return a string specifying that there is an error,
                    // which will be handled by the execute function after returning.
                    // (We return a String here and then handle the error in execute in
                    // order to wrap the error appropriately before returning it to the user)
                    catch (IndexOutOfBoundsException e) {
                        StringBuilder errorMsg = new StringBuilder("INDEX ERROR;");
                        errorMsg.append(indicesString).append(";").append(fieldsString);
                        return errorMsg.toString();
                    }

                    configTxt.append("        <column index=\"").append(index);
                    configTxt.append("\" field=\"").append(field).append("\"/>\r\n");
                }
            }

            // If the indices are not specified, assume the fields are listed in the order
            // they appear in the tsv file, starting with index 0. In this case, go through the
            // fields and use the index of each field as its index in the configuration file.
            else {
                for (int i = 0; i < tsvFields.length; i++) {
                    String field = tsvFields[i];
                    configTxt.append("        <column index=\"").append(i);
                    configTxt.append("\" field=\"").append(field).append("\"/>\r\n");
                }
            }
            configTxt.append("    </tsv>\r\n\r\n");
        }
        // END OF TSV FORMAT

        // START OF LEXICA
        // This is the path for all lexica dependent files
        String lexicaPath = "src/main/resources/lexica/";

        // A boolean that will be set to true when the first lexical parameter is set.
        // This is needed because we don't know which, if any, of the components will be
        // specified, thus we don't know if the lexica header is needed.
        boolean lexicaSet = false;

        if (inputData.getParameter("ambiguity") != null) {
            // Three arrays holding the names, which users can choose from, and their
            // corresponding filenames and field names
            String[] ambiguityNames = { "simplified", "simplified-lowercase" };
            String[] ambiguityFiles = { "en-ambiguity-classes-simplified.xz",
                    "en-ambiguity-classes-simplified-lowercase.xz" };
            String[] ambiguityFields = { "word_form_simplified", "word_form_simplified_lowercase" };

            if (!lexicaSet) {
                lexicaSet = true;
                configTxt.append("    <lexica>\r\n");
            }

            String givenName = (String) inputData.getParameter("ambiguity");
            StringBuilder ambiguityTxt = null;
            int index = 0;

            // Loop through the possible names and complete the corresponding XML format for the
            // configuration file using the index that matches the given name
            while ((ambiguityTxt == null) && (index < ambiguityNames.length)) {
                if (ambiguityNames[index] == givenName) {
                    ambiguityTxt = new StringBuilder("        <ambiguity_classes field=\"");
                    ambiguityTxt.append(ambiguityFields[index]).append("\">").append(lexicaPath);
                    ambiguityTxt.append(ambiguityFiles[index]).append("</ambiguity_classes>\r\n");
                }
                index++;
            }

            // If no names matches, the user gave an unknown name, thus return a string mentioning the error
            // which will be handled in the execute method and properly wrapped as an error data object
            // to be returned to the user
            if (ambiguityTxt == null) {
                StringBuilder errorMsg = new StringBuilder("AMBIGUITY ERROR;");
                errorMsg.append(givenName);
                return errorMsg.toString();
            }

            else {
                configTxt.append(ambiguityTxt);
            }
        }

        if (inputData.getParameter("clusters") != null) {
            // Three arrays holding the names, which users can choose from, and their
            // corresponding filenames and field names
            String[] clustersNames = { "brown-simplified-lc", "brown-twit-lc" };
            String[] clustersFiles = { "en-brown-clusters-simplified-lowercase.xz",
                    "en-brown-clusters-twit-lowercase.xz" };
            String[] clustersFields = { "word_form_simplified_lowercase", "word_form_lowercase" };

            if (!lexicaSet) {
                lexicaSet = true;
                configTxt.append("    <lexica>\r\n");
            }

            String givenName = (String) inputData.getParameter("clusters");
            StringBuilder clustersTxt = null;
            int index = 0;

            // Loop through the possible names and complete the corresponding XML format for the
            // configuration file using the index that matches the given name
            while ((clustersTxt == null) && (index < clustersNames.length)) {
                if (clustersNames[index] == givenName) {
                    clustersTxt = new StringBuilder("        <word_clusters field=\"");
                    clustersTxt.append(clustersFields[index]).append("\">").append(lexicaPath);
                    clustersTxt.append(clustersFiles[index]).append("</word_clusters>\r\n");
                }
                index++;
            }

            // If no names matches, the user gave an unknown name, thus return a string mentioning the error
            // which will be handled in the execute method and properly wrapped as an error data object
            // to be returned to the user
            if (clustersTxt == null) {
                StringBuilder errorMsg = new StringBuilder("CLUSTERS ERROR;");
                errorMsg.append(givenName);
                return errorMsg.toString();
            }

            else {
                configTxt.append(clustersTxt);
            }
        }

        if (inputData.getParameter("gazetteers") != null) {
            // Three arrays holding the names, which users can choose from, and their
            // corresponding filenames and field names
            String[] namedEntityNames = { "simplified", "simplified-lowercase" };
            String[] namedEntityFiles = { "en-named-entity-gazetteers-simplified.xz",
                    "en-named-entity-gazetteers-simplified-lowercase.xz" };
            String[] namedEntityFields = { "word_form_simplified", "word_form_simplified_lowercase" };

            if (!lexicaSet) {
                lexicaSet = true;
                configTxt.append("    <lexica>\r\n");
            }

            String givenName = (String) inputData.getParameter("gazetteers");
            StringBuilder NETxt = null;
            int index = 0;

            // Loop through the possible names and complete the corresponding XML format for the
            // configuration file using the index that matches the given name
            while ((NETxt == null) && (index < namedEntityNames.length)) {
                if (namedEntityNames[index] == givenName) {
                    NETxt = new StringBuilder("        <named_entity_gazetteers field=\"");
                    NETxt.append(namedEntityFields[index]).append("\">").append(lexicaPath);
                    NETxt.append(namedEntityFiles[index]).append("</named_entity_gazetteers>\r\n");
                }
                index++;
            }

            // If no names matches, the user gave an unknown name, thus return a string mentioning the error
            // which will be handled in the execute method and properly wrapped as an error data object
            // to be returned to the user
            if (NETxt == null) {
                StringBuilder errorMsg = new StringBuilder("NAMED ENTITY ERROR;");
                errorMsg.append(givenName);
                return errorMsg.toString();
            }

            else {
                configTxt.append(NETxt);
            }
        }

        if (inputData.getParameter("embeddings") != null) {
            // Three arrays holding the names, which users can choose from, and their
            // corresponding filenames and field names
            String[] embeddingsNames = { "undigitalized" };
            String[] embeddingsFiles = { "en-word-embeddings-undigitalized.xz" };
            String[] embeddingsFields = { "word_form_undigitalized" };

            if (!lexicaSet) {
                lexicaSet = true;
                configTxt.append("    <lexica>\r\n");
            }

            String givenName = (String) inputData.getParameter("embeddings");
            StringBuilder embeddingsTxt = null;
            int index = 0;

            // Loop through the possible names and complete the corresponding XML format for the
            // configuration file using the index that matches the given name
            while ((embeddingsTxt == null) && (index < embeddingsNames.length)) {
                if (embeddingsNames[index] == givenName) {
                    embeddingsTxt = new StringBuilder("        <word_embeddings field=\"");
                    embeddingsTxt.append(embeddingsFields[index]).append("\">").append(lexicaPath);
                    embeddingsTxt.append(embeddingsFiles[index]).append("</word_embeddings>\r\n");
                }
                index++;
            }

            // If no names matches, the user gave an unknown name, thus return a string mentioning the error
            // which will be handled in the execute method and properly wrapped as an error data object
            // to be returned to the user
            if (embeddingsTxt == null) {
                StringBuilder errorMsg = new StringBuilder("EMBEDDINGS ERROR;");
                errorMsg.append(givenName);
                return errorMsg.toString();
            }

            else {
                configTxt.append(embeddingsTxt);
            }
        }

        // If no lexica was set, remove the heading added at the beginning
        // If lexica was set, end the lexica section in the XML file
        if (lexicaSet) {
            configTxt.append("    </lexica>\r\n\r\n");
        }
        // END OF LEXICA

        // START OF OPTIMIZER
        if (inputData.getParameter("algorithm") != null) {
            String givenName = (String) inputData.getParameter("algorithm");

            //ArrayList<String> algorithmNames = new ArrayList<>();
            //algorithmNames.add("perceptron");
            //algorithmNames.add("softmax-regression");
            //algorithmNames.add("adagrad");
            //algorithmNames.add("adagrad-mini-batch");
            //algorithmNames.add("adagrad-regression");
            //algorithmNames.add("adadelta-mini-batch");

            //if(algorithmNames.contains(givenName))

            if (!(givenName.equals("perceptron") || givenName.equals("softmax-regression")
                    || givenName.equals("adagrad") || givenName.equals("adagrad-mini-batch")
                    || givenName.equals("adagrad-regression") || givenName.equals("adadelta-mini-batch"))) {
                StringBuilder errorMsg = new StringBuilder("ALGORITHM ERROR;");
                errorMsg.append(givenName);
                return errorMsg.toString();
            }

            else {
                configTxt.append("    <optimizer>\r\n");
                configTxt.append("        <algorithm>").append(givenName).append("</algorithm>\r\n");
                if (inputData.getParameter("regularization") != null) {
                    String givenNumber = (String) inputData.getParameter("regularization");
                    configTxt.append("        <l1_regularization>").append(givenNumber)
                            .append("</l1_regularization>\r\n");
                }

                if (inputData.getParameter("rate") != null) {
                    String givenNumber = (String) inputData.getParameter("rate");
                    configTxt.append("        <learning_rate>").append(givenNumber).append("</learning_rate>\r\n");
                }

                if (inputData.getParameter("cutoff") != null) {
                    String givenNumber = (String) inputData.getParameter("cutoff");
                    configTxt.append("        <feature_cutoff>").append(givenNumber)
                            .append("</feature_cutoff>\r\n");
                }

                boolean lolsSet = false;

                if (inputData.getParameter("lols-fixed") != null) {
                    lolsSet = true;
                    String givenNumber = (String) inputData.getParameter("lols-fixed");
                    configTxt.append("        <lols fixed=\"").append(givenNumber).append("\"");
                }

                if (inputData.getParameter("lols-decaying") != null) {
                    String givenNumber = (String) inputData.getParameter("lols-decaying");

                    if (lolsSet) {
                        configTxt.append(" decaying=\"").append(givenNumber).append("\"/>\r\n");
                    }

                    // This is assuming that one can set decaying without fixed.
                    // TODO: Check if this is possible
                    else {
                        configTxt.append("        <lols decaying=\"").append(givenNumber).append("\"/>\r\n");
                    }
                }

                if (inputData.getParameter("max-epoch") != null) {
                    String givenNumber = (String) inputData.getParameter("max-epoch");
                    configTxt.append("        <max_epoch>").append(givenNumber).append("</max_epoch>\r\n");
                }

                if (inputData.getParameter("batch-size") != null) {
                    String givenNumber = (String) inputData.getParameter("batch-size");
                    configTxt.append("        <batch_size>").append(givenNumber).append("</batch_size>\r\n");
                }

                if (inputData.getParameter("bias") != null) {
                    String givenNumber = (String) inputData.getParameter("bias");
                    configTxt.append("        <bias>").append(givenNumber).append("</bias>\r\n");
                }

                configTxt.append("    </optimizer>\r\n\r\n");
            }
        }
        // END OF OPTIMIZER

        // START OF FEATURES
        // Before starting a counter and examining subsequent feature templates, check if the feature
        // indexed at 0 is specified, if so create the header for all feature templates then check
        // for further indices
        if ((inputData.getParameter("1-f0-source") != null) && (inputData.getParameter("1-f0-field") != null)) {

            configTxt.append("    <feature_template>\r\n");

            boolean featureFound = true;
            boolean lineFound = true;

            // Counter holding the index of the current feature.
            int f = 0; // NLP4J starts feature indexing at 0
            // Counter holding the index of the current feature line
            int i = 1; // Starts at 1 because we are checking for features within the first feature line

            // These Strings will hold the user variables
            String source, window, relation, field, value;

            // These Strings will hold the name of the parameter to be accessed
            // They will be changed to look for further features and further lines
            String sourceKey = "1-f0-source";
            String windowKey = "1-f0-window";
            String relationKey = "1-f0-relation";
            String fieldKey = "1-f0-field";
            String valueKey = "1-f0-value";

            // These StringBuilders will be used to create the keys when changing the feature number
            // or line being checked
            StringBuilder sourceBuilder, windowBuilder, relationBuilder, fieldBuilder, valueBuilder;

            while (lineFound) {

                while (featureFound) {

                    if ((inputData.getParameter(sourceKey) != null) && (inputData.getParameter(fieldKey) != null)) {
                        featureFound = true;

                        configTxt.append(" ");
                        if (f == 0) {
                            configTxt.append("       <feature ");
                        }

                        source = (String) inputData.getParameter(sourceKey);
                        if (!EnumUtils.isValidEnum(Source.class, source)) {
                            StringBuilder errorMsg = new StringBuilder("INVALID FEATURE SOURCE ERROR;");
                            errorMsg.append(source).append(";").append(i).append(";").append(f);
                            return errorMsg.toString();
                        }

                        configTxt.append("f").append(f);
                        configTxt.append("=\"").append(source);

                        if (inputData.getParameter(windowKey) != null) {
                            window = (String) inputData.getParameter(windowKey);
                            if (!((window.contains("-")) || (window.contains("+")))) {
                                configTxt.append("+");
                            }
                            configTxt.append(window);
                        }

                        if (inputData.getParameter(relationKey) != null) {
                            relation = (String) inputData.getParameter(relationKey);
                            if (!EnumUtils.isValidEnum(Relation.class, relation)) {
                                StringBuilder errorMsg = new StringBuilder("INVALID FEATURE RELATION ERROR;");
                                errorMsg.append(source).append(";").append(i).append(";").append(f);
                                return errorMsg.toString();
                            }
                            configTxt.append("_").append(relation);
                        }

                        field = (String) inputData.getParameter(fieldKey);
                        if (!EnumUtils.isValidEnum(Field.class, field)) {
                            StringBuilder errorMsg = new StringBuilder("INVALID FEATURE FIELD ERROR;");
                            errorMsg.append(source).append(";").append(i).append(";").append(f);
                            return errorMsg.toString();
                        }
                        configTxt.append(":").append(field);

                        if (inputData.getParameter(valueKey) != null) {
                            value = (String) inputData.getParameter(valueKey);
                            configTxt.append(":").append(value);
                        }

                        configTxt.append("\"");

                        f++;

                    }

                    else {
                        // If f is not 0, then there has been at least one feature on this line, which
                        // means this line wasn't empty
                        if (f != 0) {
                            lineFound = true;
                            featureFound = true;
                            configTxt.append("/>\r\n");
                            f = 0;
                            i++;
                        }

                        else {
                            featureFound = false;
                            lineFound = false;
                        }
                    }

                    sourceBuilder = new StringBuilder();
                    sourceBuilder.append(i).append("-f").append(f).append("-source");
                    sourceKey = sourceBuilder.toString();

                    fieldBuilder = new StringBuilder();
                    fieldBuilder.append(i).append("-f").append(f).append("-field");
                    fieldKey = fieldBuilder.toString();

                    windowBuilder = new StringBuilder();
                    windowBuilder.append(i).append("-f").append(f).append("-window");
                    windowKey = windowBuilder.toString();

                    relationBuilder = new StringBuilder();
                    relationBuilder.append(i).append("-f").append(f).append("-relation");
                    relationKey = relationBuilder.toString();

                    valueBuilder = new StringBuilder();
                    valueBuilder.append(i).append("-f").append(f).append("-value");
                    valueKey = valueBuilder.toString();
                }
            }
            configTxt.append("    </feature_template>\r\n\r\n");
        }
        // END OF FEATURES

        configTxt.append("</configuration>");

        Path filePath = writeTempFile("config", dir, configTxt.toString(), ".xml");

        return filePath.toString().replace("\\", "/");
    }

    /** This method takes an error message and returns it in a {@code Data}
     * object with the discriminator set to http://vocab.lappsgrid.org/ns/error
     *
     * @param message A string representing the error message
     * @return A JSON string containing a Data object with the message as a payload.
     */
    protected String generateError(String message) {
        Data<String> data = new Data<>();
        data.setDiscriminator(Discriminators.Uri.ERROR);
        data.setPayload(message);
        return data.asPrettyJson();
    }

    /** This method creates a temporary text file at a certain directory, and writes
     * the given content into the file. The file will also be set to delete on exit.
     *
     * @param fileName The prefix for the temporary file to be created
     * @param dirPath The path to the directory in which the file should be created
     * @param fileTxt The text to be written in the file
     * @param extension The extension to be given to the temporary file
     * @return A path to the temporary text file that was created
     */
    protected Path writeTempFile(String fileName, Path dirPath, String fileTxt, String extension)
            throws IOException {
        Path filePath = Files.createTempFile(dirPath, fileName, extension);
        File file = filePath.toFile();
        PrintWriter writer = new PrintWriter(file, "UTF-8");
        writer.print(fileTxt);
        writer.close();
        file.deleteOnExit();
        return filePath;
    }

}