org.opentox.jaqpot3.qsar.util.PMMLProcess.java Source code

Java tutorial

Introduction

Here is the source code for org.opentox.jaqpot3.qsar.util.PMMLProcess.java

Source

/*
 *
 * Jaqpot - version 3
 *
 * The JAQPOT-3 web services are OpenTox API-1.2 compliant web services. Jaqpot
 * is a web application that supports model training and data preprocessing algorithms
 * such as multiple linear regression, support vector machines, neural networks
 * (an in-house implementation based on an efficient algorithm), an implementation
 * of the leverage algorithm for domain of applicability estimation and various
 * data preprocessing algorithms like PLS and data cleanup.
 *
 * Copyright (C) 2009-2012 Pantelis Sopasakis & Charalampos Chomenides
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contact:
 * Pantelis Sopasakis
 * chvng@mail.ntua.gr
 * Address: Iroon Politechniou St. 9, Zografou, Athens Greece
 * tel. +30 210 7723236
 *
 */

package org.opentox.jaqpot3.qsar.util;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang.StringUtils;
import org.dmg.pmml.PMML;
import org.dmg.pmml.TransformationDictionary;
import org.jpmml.model.ImportFilter;
import org.jpmml.model.JAXBUtil;
import org.opentox.jaqpot3.exception.JaqpotException;
import org.opentox.jaqpot3.qsar.serializable.FastRbfNnModel;
import org.opentox.jaqpot3.qsar.serializable.PLSModel;
import org.opentox.jaqpot3.resources.collections.Algorithms;
import org.opentox.jaqpot3.util.Configuration;
import org.opentox.toxotis.core.component.Feature;
import org.opentox.toxotis.core.component.Model;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import weka.classifiers.functions.LinearRegression;
import weka.classifiers.functions.SVMreg;
import weka.classifiers.functions.supportVector.Kernel;
import weka.core.Instances;
import weka.filters.supervised.attribute.PLSFilter;

/**
 *
 * @author Pantelis Sopasakis
 * @author Charalampos Chomenides
 */

public class PMMLProcess {

    private static org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PMMLProcess.class);

    public static String generatePMML(Model model) throws JaqpotException {
        if (model.getAlgorithm().equals(Algorithms.mlr())) {
            return generateMLR(model);
        } else if (model.getAlgorithm().equals(Algorithms.plsFilter())) {
            return generatePLS(model);
        } else if (model.getAlgorithm().equals(Algorithms.svm())) {
            return generateSVM(model);
        } else if (model.getAlgorithm().equals(Algorithms.fastRbfNn())) {
            return generateFastRbf(model);
        } else {
            throw new UnsupportedOperationException("PMML Representation for this model is not implemented yet.");
        }
    }

    private static String generateMLR(Model model) throws JaqpotException {
        LinearRegression wekaModel = (LinearRegression) model.getActualModel().getSerializableActualModel();
        byte[] pmmlFile = model.getActualModel().getPmml();

        String uuid = model.getUri().getId();
        String PMMLIntro = Configuration.getStringProperty("pmml.intro");
        StringBuilder pmml = new StringBuilder();
        try {
            final double[] coefficients = wekaModel.coefficients();
            pmml.append("<?xml version=\"1.0\" ?>");
            pmml.append(PMMLIntro);
            pmml.append("<Model ID=\"" + uuid + "\" Name=\"MLR Model\">\n");
            pmml.append("<AlgorithmID href=\"" + Configuration.BASE_URI + "algorithm/mlr\"/>\n");

            pmml.append("<AlgorithmParameters />\n");
            pmml.append("<Timestamp>" + java.util.GregorianCalendar.getInstance().getTime() + "</Timestamp>\n");
            pmml.append("</Model>\n");
            pmml.append("<DataDictionary numberOfFields=\"" + model.getIndependentFeatures().size() + "\" >\n");
            for (Feature feature : model.getIndependentFeatures()) {
                pmml.append("<DataField name=\"" + feature.getUri().toString()
                        + "\" optype=\"continuous\" dataType=\"double\" />\n");
            }
            pmml.append("</DataDictionary>\n");

            if (pmmlFile != null) {
                if (pmmlFile.length > 0) {
                    String TrDictionaryString = getTransformationDictionaryXML(pmmlFile);
                    pmml.append(TrDictionaryString + "\n");
                }
            }

            String dependentFeatures = (model.getDependentFeatures().isEmpty()) ? ""
                    : model.getDependentFeatures().iterator().next().getUri().toString();
            // RegressionModel
            pmml.append("<RegressionModel modelName=\"" + uuid.toString() + "\""
                    + " functionName=\"regression\" modelType=\"linearRegression\""
                    + " algorithmName=\"linearRegression\">\n");
            // RegressionModel::MiningSchema
            pmml.append("<MiningSchema>\n");
            for (Feature feature : model.getIndependentFeatures()) {
                pmml.append("<MiningField name=\"" + feature.getUri().toString() + "\" />\n");

            }

            pmml.append("<MiningField name=\"" + dependentFeatures + "\" " + "usageType=\"predicted\"/>\n");
            pmml.append("</MiningSchema>\n");
            // RegressionModel::RegressionTable
            pmml.append("<RegressionTable intercept=\"" + coefficients[coefficients.length - 1] + "\">\n");

            int k = 0;

            for (k = 0; k < model.getIndependentFeatures().size(); k++) {
                pmml.append("<NumericPredictor name=\"" + model.getIndependentFeatures().get(k).getUri().toString()
                        + "\" " + " exponent=\"1\" " + "coefficient=\"" + coefficients[k] + "\"/>\n");
            }

            if (pmmlFile != null) {
                if (pmmlFile.length > 0) {
                    PMML pmmlObject = loadPMMLObject(pmmlFile);
                    TransformationDictionary trDir = pmmlObject.getTransformationDictionary();
                    if (trDir != null) {
                        int trDirSize = trDir.getDerivedFields().size();
                        int j = 0;
                        while (j < trDirSize) {
                            pmml.append("<NumericPredictor name=\""
                                    + trDir.getDerivedFields().get(j).getName().toString() + "\" "
                                    + " exponent=\"1\" " + "coefficient=\"" + coefficients[k] + "\"/>\n");
                            ++k;
                            ++j;
                        }
                    }
                }
            }
            pmml.append("</RegressionTable>\n");
            pmml = getPMMLStats(pmml, model.getDataset().toString(),
                    model.getActualModel().getStatistics().toString());
            pmml.append("</RegressionModel>\n");
            pmml.append("</PMML>\n\n");
        } catch (UnsupportedEncodingException ex) {
            String message = "Character Encoding :'" + Configuration.getStringProperty("jaqpot.urlEncoding")
                    + "' is not supported.";
            logger.debug(message, ex);
            throw new JaqpotException(message, ex);
        } catch (Exception ex) {
            String message = "Unexpected exception was caught while generating"
                    + " the PMML representaition of a trained model.";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }
        return pmml.toString();
    }

    private static String generateSVM(Model model) throws JaqpotException {
        SVMreg regressor = (SVMreg) model.getActualModel().getSerializableActualModel();
        Kernel kr = regressor.getKernel();
        byte[] pmmlFile = model.getActualModel().getPmml();

        String uuid = model.getUri().getId();
        String PMMLIntro = Configuration.getStringProperty("pmml.intro");
        StringBuilder pmml = new StringBuilder();
        String kernel = GenericUtils.getValueFromSet(model.getParameters(), "kernel").toLowerCase();

        try {
            pmml.append("<?xml version=\"1.0\" ?>");
            pmml.append(PMMLIntro);
            pmml.append("<Model ID=\"" + uuid + "\" Name=\"SVM Model\">\n");
            pmml.append("<AlgorithmID href=\"" + Configuration.BASE_URI + "algorithm/svm\"/>\n");

            pmml.append("<DatasetID href=\"" + model.getDataset().toString() + "\"/>\n");
            pmml.append("<AlgorithmParameters />\n");
            pmml.append("<Timestamp>" + java.util.GregorianCalendar.getInstance().getTime() + "</Timestamp>\n");
            pmml.append("</Model>\n");
            pmml.append("<DataDictionary numberOfFields=\"" + model.getIndependentFeatures().size() + "\" >\n");
            for (Feature feature : model.getIndependentFeatures()) {
                pmml.append("<DataField name=\"" + feature.getUri().toString()
                        + "\" optype=\"continuous\" dataType=\"double\" />\n");
            }
            pmml.append("</DataDictionary>\n");

            if (pmmlFile != null) {
                if (pmmlFile.length > 0) {
                    String TrDictionaryString = getTransformationDictionaryXML(pmmlFile);
                    pmml.append(TrDictionaryString + "\n");
                }
            }

            String dependentFeatures = (model.getDependentFeatures().isEmpty()) ? ""
                    : model.getDependentFeatures().iterator().next().getUri().toString();

            String svmRepresentation = (StringUtils.equals(kernel, "linear")) ? "Coefficients" : "SupportVectors";
            // SupportVectorMachineModel
            pmml.append("<SupportVectorMachineModel modelName=\"" + uuid.toString() + "\""
                    + " algorithmName=\"supportVectorMachine\" functionName=\"regression\" svmRepresentation=\""
                    + svmRepresentation + "\">\n");

            pmml.append("<MiningSchema>\n");
            for (Feature feature : model.getIndependentFeatures()) {
                pmml.append("<MiningField name=\"" + feature.getUri().toString() + "\" />\n");

            }

            pmml.append("<MiningField name=\"" + dependentFeatures + "\" " + "usageType=\"predicted\"/>\n");
            pmml.append("</MiningSchema>\n");

            if (StringUtils.equals(kernel, "rbf")) {
                String gamma = GenericUtils.getValueFromSet(model.getParameters(), "gamma");
                pmml.append("<RadialBasisKernelType gamma=\"" + gamma
                        + "\" description=\"Radial basis kernel type\"/>\n");

            } else if (StringUtils.equals(kernel, "polynomial")) {
                String gamma = GenericUtils.getValueFromSet(model.getParameters(), "gamma");
                String degree = GenericUtils.getValueFromSet(model.getParameters(), "degree");
                pmml.append("<PolynomialKernelType coef0=\"0\" gamma=\"" + gamma + "\" degree=\"" + degree
                        + "\" description=\"Polynomial kernel type\"/>\n");
            } else if (StringUtils.equals(kernel, "linear")) {

                pmml.append("<LinearKernelType description=\"Linear kernel type\"/>\n");
            }

            if (StringUtils.equals(kernel, "rbf") || StringUtils.equals(kernel, "polynomial")) {
                Map<Integer, String> supportVectors = getSupportVectors(regressor.toString());
                String supportCoeffs = getSupportVectorsCoefficientB(regressor.toString());

                pmml.append("<VectorDictionary numberOfVectors=\"" + supportVectors.size() + "\">\n");
                pmml.append("<VectorFields  numberOfFields=\"" + model.getIndependentFeatures().size() + "\">\n");
                for (Feature feature : model.getIndependentFeatures()) {
                    pmml.append("<FieldRef field=\"" + feature.getUri().toString() + "\" />\n");
                }
                pmml.append("</VectorFields>\n");
                for (int i = 0; i < supportVectors.size(); ++i) {
                    pmml.append("<VectorInstance id=\"k" + i + "\">\n");
                    pmml.append("</VectorInstance>\n");
                }

                pmml.append("</VectorDictionary>\n");

                pmml.append("<SupportVectorMachine targetCategory=\"no\" alternateTargetCategory=\"yes\">\n");
                pmml.append("<SupportVectors numberOfAttributes=\"" + model.getIndependentFeatures().size()
                        + "\" numberOfSupportVectors=\"" + supportVectors.size() + "\">\n");
                for (int i = 0; i < supportVectors.size(); ++i) {
                    pmml.append("<SupportVector vectorId=\"k" + i + "\">\n");
                    pmml.append("</SupportVector>\n");
                }
                pmml.append("</SupportVectors>\n");

                pmml.append("<Coefficients absoluteValue=\"" + supportCoeffs + "\" numberOfCoefficients=\""
                        + supportVectors.size() + "\">\n");
                for (int i = 0; i < supportVectors.size(); ++i) {
                    pmml.append("<Coefficient value=\"" + supportVectors.get(i) + "\"/>\n");
                }
                pmml.append("</Coefficients>\n");

                pmml.append("</SupportVectorMachine>\n");
            } else if (StringUtils.equals(kernel, "linear")) {
                List<String> linearCoeffs = getSVMLinearCoefficients(regressor.toString());
                String supportCoeffs = getSVMLinearCoefficientB(regressor.toString());

                pmml.append("<SupportVectorMachine targetCategory=\"no\" alternateTargetCategory=\"yes\">\n");

                pmml.append("<Coefficients absoluteValue=\"" + supportCoeffs + "\" numberOfCoefficients=\""
                        + linearCoeffs.size() + "\">\n");
                for (int i = 0; i < linearCoeffs.size(); ++i) {
                    pmml.append("<Coefficient value=\"" + linearCoeffs.get(i) + "\"/>\n");
                }
                pmml.append("</Coefficients>\n");

                pmml.append("</SupportVectorMachine>\n");
            }
            pmml = getPMMLStats(pmml, model.getDataset().toString(),
                    model.getActualModel().getStatistics().toString());
            pmml.append("</SupportVectorMachineModel>\n");
            pmml.append("</PMML>\n\n");
        } catch (UnsupportedEncodingException ex) {
            String message = "Character Encoding :'" + Configuration.getStringProperty("jaqpot.urlEncoding")
                    + "' is not supported.";
            logger.debug(message, ex);
            throw new JaqpotException(message, ex);
        } catch (Exception ex) {
            String message = "Unexpected exception was caught while generating"
                    + " the PMML representation of a trained model.";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }
        return pmml.toString();
    }

    private static String generatePLS(Model model) throws JaqpotException {
        PLSModel actual = (PLSModel) model.getActualModel().getSerializableActualModel();
        PLSFilter plsFilter = actual.getPls();
        byte[] pmmlFile = model.getActualModel().getPmml();

        String uuid = model.getUri().getId();
        String PMMLIntro = Configuration.getStringProperty("pmml.intro");
        StringBuilder pmml = new StringBuilder();
        try {
            pmml.append("<?xml version=\"1.0\" ?>");
            pmml.append(PMMLIntro);
            pmml.append("<Model ID=\"" + uuid + "\" Name=\"PLS Model\">\n");
            pmml.append("<AlgorithmID href=\"" + Configuration.BASE_URI + "algorithm/pls\"/>\n");
            //  URI trainingDatasetURI = URI.create(model.getDataset().getUri());

            pmml.append("<DatasetID href=\"" + URLEncoder.encode(model.getDataset().toString(),
                    Configuration.getStringProperty("jaqpot.urlEncoding")) + "\"/>\n");
            pmml.append("<AlgorithmParameters />\n");
            //            pmml.append("<FeatureDefinitions>\n");
            //            for (Feature feature : model.getIndependentFeatures()) {
            //                pmml.append("<link href=\"" + feature.getUri().toString() + "\"/>\n");
            //            }
            //            pmml.append("<target index=\"" + data.attribute(model.getPredictedFeature().getUri().toString()).index() + "\" name=\"" + model.getPredictedFeature().getUri().toString() + "\"/>\n");
            //            pmml.append("</FeatureDefinitions>\n");
            pmml.append("<Timestamp>" + java.util.GregorianCalendar.getInstance().getTime() + "</Timestamp>\n");
            pmml.append("</Model>\n");
            pmml.append("<DataDictionary numberOfFields=\"" + model.getIndependentFeatures().size() + "\" >\n");
            for (Feature feature : model.getIndependentFeatures()) {
                pmml.append("<DataField name=\"" + feature.getUri().toString()
                        + "\" optype=\"continuous\" dataType=\"double\" />\n");
            }
            pmml.append("</DataDictionary>\n");

            if (pmmlFile != null) {
                if (pmmlFile.length > 0) {
                    String TrDictionaryString = getTransformationDictionaryXML(pmmlFile);
                    pmml.append(TrDictionaryString + "\n");
                }
            }

            // RegressionModel
            pmml.append("<RegressionModel modelName=\"" + uuid.toString() + "\""
                    + " functionName=\"regression\" modelType=\"linearRegression\""
                    + " algorithmName=\"linearRegression\">\n");
            // RegressionModel::MiningSchema
            pmml.append("<MiningSchema>\n");
            for (Feature feature : model.getIndependentFeatures()) {
                pmml.append("<MiningField name=\"" + feature.getUri().toString() + "\" />\n");

            }
            for (Feature feature : model.getPredictedFeatures()) {
                pmml.append("<MiningField name=\"" + feature.getUri().toString() + "\" "
                        + "usageType=\"predicted\"/>\n");

            }
            pmml.append("</MiningSchema>\n");
            // RegressionModel::RegressionTable

            pmml.append("<ParameterList>");
            pmml.append("<Parameter name=\"algorithm\" label=\"Algorithm\"/>");
            pmml.append("<Parameter name=\"preprocessing\" label=\"Preprocessing\"/>");
            pmml.append("<Parameter name=\"doUpdateClass\" label=\"doUpdateClass\"/>");
            pmml.append("<Parameter name=\"numComponents\" label=\"Number of Components\"/>");
            pmml.append("</ParameterList>");
            pmml.append("</RegressionTable>\n");
            pmml = getPMMLStats(pmml, model.getDataset().toString(),
                    model.getActualModel().getStatistics().toString());
            pmml.append("</RegressionModel>\n");
            pmml.append("</PMML>\n\n");
        } catch (UnsupportedEncodingException ex) {
            String message = "Character Encoding :'" + Configuration.getStringProperty("jaqpot.urlEncoding")
                    + "' is not supported.";
            logger.debug(message, ex);
            throw new JaqpotException(message, ex);
        } catch (Exception ex) {
            String message = "Unexpected exception was caught while generating"
                    + " the PMML representaition of a trained model.";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }
        return pmml.toString();
    }

    private static String generateFastRbf(Model model) throws JaqpotException {
        FastRbfNnModel rbfModel = (FastRbfNnModel) model.getActualModel().getSerializableActualModel();
        byte[] pmmlFile = model.getActualModel().getPmml();

        Instances rbfNnNodes = rbfModel.getNodes();
        double[] sigma = rbfModel.getSigma();

        String uuid = model.getUri().getId();
        String PMMLIntro = Configuration.getStringProperty("pmml.intro");
        StringBuilder pmml = new StringBuilder();
        try {
            //final double[] coefficients = wekaModel.coefficients();
            pmml.append("<?xml version=\"1.0\" ?>");
            pmml.append(PMMLIntro);
            pmml.append("<Model ID=\"" + uuid + "\" Name=\"fastRbfNn Model\">\n");
            pmml.append("<AlgorithmID href=\"" + Configuration.BASE_URI + "algorithm/fastRbfNn\"/>\n");

            pmml.append("<DatasetID href=\"" + model.getDataset().toString() + "\"/>\n");
            pmml.append("<AlgorithmParameters />\n");
            pmml.append("<Timestamp>" + java.util.GregorianCalendar.getInstance().getTime() + "</Timestamp>\n");
            pmml.append("</Model>\n");
            pmml.append("<DataDictionary numberOfFields=\"" + model.getIndependentFeatures().size() + "\" >\n");
            for (Feature feature : model.getIndependentFeatures()) {
                pmml.append("<DataField name=\"" + feature.getUri().toString()
                        + "\" optype=\"continuous\" dataType=\"double\" />\n");
            }
            pmml.append("</DataDictionary>\n");

            if (pmmlFile != null) {
                if (pmmlFile.length > 0) {
                    String TrDictionaryString = getTransformationDictionaryXML(pmmlFile);
                    pmml.append(TrDictionaryString + "\n");
                }
            }

            String dependentFeatures = (model.getDependentFeatures().isEmpty()) ? ""
                    : model.getDependentFeatures().iterator().next().getUri().toString();
            // fastRbfNnModel
            pmml.append("<NeuralNetwork modelName=\"" + uuid.toString() + "\""
                    + " functionName=\"regression\" activationFunction=\"radialBasis\">\n");

            pmml.append("<MiningSchema>\n");
            for (Feature feature : model.getIndependentFeatures()) {
                pmml.append("<MiningField name=\"" + feature.getUri().toString() + "\" />\n");
            }
            pmml.append("<MiningField name=\"" + dependentFeatures + "\" " + "usageType=\"predicted\"/>\n");
            pmml.append("</MiningSchema>\n");

            //No neuralInputs

            pmml.append("<NeuralLayer numberOfNeurons=\"" + rbfNnNodes.numInstances() + "\">\n");
            int numAttributes = rbfNnNodes.numAttributes();
            for (int i = 0; i < rbfNnNodes.numInstances(); i++) {

                pmml.append("<Neuron width=\"" + sigma[i] + "\" id=\"" + i + "\">\n");
                for (int j = 0; j < numAttributes; j++) {
                    pmml.append("<Con from=\"" + j + "\" weight=\"" + rbfNnNodes.instance(i).value(j) + "\"/>\n");
                }
                pmml.append("</Neuron>\n");
            }
            pmml.append("</NeuralLayer>\n");

            int neuralOutputId = rbfNnNodes.numInstances();
            pmml.append("<NeuralLayer numberOfNeurons=\"1\">\n");
            pmml.append("<Neuron id=\"" + neuralOutputId + "\">\n");

            double[] rbfCoeffs = rbfModel.getLrCoefficients();
            int tempId;
            for (int i = 0; i < rbfCoeffs.length; i++) {
                tempId = numAttributes + i;
                pmml.append("<Con from=\"" + tempId + "\" weight=\"" + rbfCoeffs[i] + "\"/>\n");
            }
            pmml.append("</Neuron>\n");
            pmml.append("</NeuralLayer>\n");
            pmml.append("<NeuralOutputs numberOfOutputs=\"1\">");
            pmml.append("<NeuralOutput outputNeuron=\"" + neuralOutputId + "\">");
            pmml.append("<DerivedField optype=\"continuous\" dataType=\"double\">");
            pmml.append("</DerivedField>");
            pmml.append("</NeuralOutput>");
            pmml.append("</NeuralOutputs>");
            pmml = getPMMLStats(pmml, model.getDataset().toString(),
                    model.getActualModel().getStatistics().toString());
            pmml.append("</NeuralNetwork>\n");
            pmml.append("</PMML>\n\n");
        } catch (UnsupportedEncodingException ex) {
            String message = "Character Encoding :'" + Configuration.getStringProperty("jaqpot.urlEncoding")
                    + "' is not supported.";
            logger.debug(message, ex);
            throw new JaqpotException(message, ex);
        } catch (Exception ex) {
            String message = "Unexpected exception was caught while generating"
                    + " the PMML representaition of a trained model.";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }
        return pmml.toString();
    }

    public static PMML loadPMMLObject(byte[] pmml) throws JaqpotException {
        PMML pmmlObject;
        try {
            InputStream is = new ByteArrayInputStream(pmml);
            InputSource source = new InputSource(is);

            SAXSource transformedSource = ImportFilter.apply(source);
            pmmlObject = JAXBUtil.unmarshalPMML(transformedSource);
        } catch (Exception ex) {
            String message = "Exception while loading PMML to object";
            throw new JaqpotException(message, ex);
        }
        return pmmlObject;
    }

    private static String getNodeFromXML(String xml) throws JaqpotException {
        String res = "";
        try {
            DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
            ByteArrayInputStream bis = new ByteArrayInputStream(xml.getBytes());
            Document doc = docBuilder.parse(bis);

            // XPath to retrieve the content of the <FamilyAnnualDeductibleAmount> tag
            XPath xpath = XPathFactory.newInstance().newXPath();
            String expression = "/PMML/TransformationDictionary";
            Node node = (Node) xpath.compile(expression).evaluate(doc, XPathConstants.NODE);

            StreamResult xmlOutput = new StreamResult(new StringWriter());
            Transformer transformer = TransformerFactory.newInstance().newTransformer();
            transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
            transformer.transform(new DOMSource(node), xmlOutput);
            res = xmlOutput.getWriter().toString();

        } catch (Exception ex) {
            String message = "Unexpected exception was caught while generating"
                    + " the PMML representaition of a trained model.";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }

        return res;
    }

    private static String getTransformationDictionaryXML(byte[] pmmlFile) throws JaqpotException, IOException {
        String res = "";
        PMML pmmlObject = loadPMMLObject(pmmlFile);
        PMML newPmmlObject = new PMML();
        newPmmlObject.setTransformationDictionary(pmmlObject.getTransformationDictionary());
        StringWriter outWriter = new StringWriter();
        try {
            JAXBUtil.marshalPMML(newPmmlObject, new StreamResult(outWriter));
            StringBuffer sb = outWriter.getBuffer();
            res = getNodeFromXML(sb.toString());
        } catch (Exception ex) {
            String message = "Unexpected exception was caught while generating"
                    + " the PMML representation of a trained model.";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        } finally {
            outWriter.close();
        }
        return res;
    }

    private static String getCorrelation(String stats) {
        return getStatisticsAttr("Correlation\\s*coefficient", stats);
    }

    private static String getNoRecords(String stats) {
        return getStatisticsAttr("Total\\s*Number\\s*of\\s*Instances", stats);
    }

    private static String getMeanAbsError(String stats) {
        return getStatisticsAttr("Mean\\s*absolute\\s*error", stats);
    }

    private static String getRootMeanSquaredError(String stats) {
        return getStatisticsAttr("Root\\s*mean\\s*squared\\s*error", stats);
    }

    private static String getRelativeAbsoluteError(String stats) {
        return getStatisticsAttr("Relative\\s*absolute\\s*error", stats);
    }

    private static String getRootRelativeSquaredError(String stats) {
        return getStatisticsAttr("Root\\s*relative\\s*squared\\s*error", stats);
    }

    private static String getStatisticsAttr(String attr, String stats) {
        return GenericUtils.getRegex("\\s*" + attr + "\\s*([0-9.]*)", stats);
    }

    private static Map<Integer, String> getSupportVectors(String info) {
        Map<Integer, String> res = new HashMap();
        Pattern p = Pattern
                .compile("\\s*([+\\-]?(?:0|[1-9]\\d*)(?:\\.\\d*)?(?:[eE][+\\-]?\\d+)?)\\s*\\*\\s*k\\[([0-9]*)\\]");
        Matcher m = p.matcher(info);

        while (m.find()) {
            res.put(Integer.parseInt(m.group(2)), m.group(1));
        }
        return res;
    }

    private static String getSupportVectorsCoefficientB(String info) {
        String res = "";
        Pattern p = Pattern
                .compile("\\s*([+\\-]?(?:0|[1-9]\\d*)(?:\\.\\d*)?(?:[eE][+\\-]?\\d+)?)\\s*\\*\\s*k\\[([0-9]*)\\]");
        Matcher m = p.matcher(info);

        info = m.replaceAll("");
        p = Pattern.compile("\\s*([+\\-]\\s*?(?:0|[1-9]\\d*)(?:\\.\\d*)?(?:[eE][+\\-]?\\d+)?)");
        m = p.matcher(info);
        if (m.find()) {
            res = m.group(0);
        }
        res = res.replace("\n", "");
        res = res.replaceAll(" ", "");
        return res;
    }

    private static List<String> getSVMLinearCoefficients(String info) {
        List<String> res = new ArrayList();
        Pattern p = Pattern.compile(
                "\\s*([+\\-]?\\s*(?:0|[1-9]\\d*)(?:\\.\\d*)?(?:[eE][+\\-]?\\d+)?)\\s*\\*\\s*\\(normalized\\)");
        Matcher m = p.matcher(info);

        while (m.find()) {
            res.add(m.group(1));
        }
        return res;
    }

    private static String getSVMLinearCoefficientB(String info) {
        String res = "";
        Pattern p = Pattern.compile(
                "\\s*([+\\-]?\\s*(?:0|[1-9]\\d*)(?:\\.\\d*)?(?:[eE][+\\-]?\\d+)?)\\s*\\*\\s*\\(normalized\\).*");
        Matcher m = p.matcher(info);

        info = m.replaceAll("");
        p = Pattern.compile("\\s*([+\\-]?\\s*(?:0|[1-9]\\d*)(?:\\.\\d*)?(?:[eE][+\\-]?\\d+)?)");
        m = p.matcher(info);
        if (m.find()) {
            res = m.group(0);
        }
        res = res.replace("\n", "");
        res = res.replaceAll(" ", "");
        return res;
    }

    private static StringBuilder getPMMLStats(StringBuilder pmml, String dsName, String stats) {

        pmml.append("<ModelExplanation>\n");
        pmml.append("<PredictiveModelQuality dataName=\"" + dsName + "\" dataUsage=\"training\" ");
        pmml.append("meanAbsoluteError=\"" + getMeanAbsError(stats) + "\" ");
        pmml.append("rootMeanSquaredError=\"" + getRootMeanSquaredError(stats) + "\" ");

        pmml.append("/>\n");
        pmml.append("</ModelExplanation>\n");

        return pmml;
    }

    //[-+]([0-9.]*|[0-9]*|[0-9.]*[eE]\-[0-9]*)\s*\*\s*\k\[[0-9]*\]  -?\d+(,\d+)*(\.\d+(E\-\d+)?)?
}