List of usage examples for weka.classifiers.functions LinearRegression coefficients
public double[] coefficients()
From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java
License:Open Source License
public static WekaLinRegData createWekaLinRegData(int timeslot, Instances X, Double[] yvals, ArrayList<Double> candidateLambdas) throws Exception { WekaLinRegData result;/*from w w w .j a v a 2 s. c o m*/ // normalize Standardize standardize = new Standardize(); try { standardize.setInputFormat(X); } catch (Exception e) { log.error("PolyRegCust.predictNumSubs() data standardizing exception", e); throw e; } Instances nrmFeatures = RegressionUtils.featureNormalize(X, standardize); log.info("normalized features " + nrmFeatures); // add y to X since this is what weka expects Instances Xy = RegressionUtils.addYforWeka(nrmFeatures, yvals); // run cross validation for lambda Double bestLambda = findBestRegularizationParameter(Xy, candidateLambdas); if (null == bestLambda) { String message = "best regularization parameter is null, cannot predict"; log.error(message); throw new Exception(message); } // run linear regression LinearRegression linearRegression = RegressionUtils.createLinearRegression(); linearRegression.setRidge(bestLambda); try { linearRegression.buildClassifier(Xy); log.info("theta " + Arrays.toString(linearRegression.coefficients())); } catch (Exception e) { log.error("PolyRegCust.predictNumSubs() buildClassifier exception", e); throw e; } result = new WekaLinRegData(standardize, linearRegression, timeslot); return result; }
From source file:org.jaqpot.algorithm.resource.WekaMLR.java
License:Open Source License
@POST @Path("training") public Response training(TrainingRequest request) { try {/* ww w . j a va 2 s. co m*/ if (request.getDataset().getDataEntry().isEmpty() || request.getDataset().getDataEntry().get(0).getValues().isEmpty()) { return Response.status(Response.Status.BAD_REQUEST).entity( ErrorReportFactory.badRequest("Dataset is empty", "Cannot train model on empty dataset")) .build(); } List<String> features = request.getDataset().getDataEntry().stream().findFirst().get().getValues() .keySet().stream().collect(Collectors.toList()); Instances data = InstanceUtils.createFromDataset(request.getDataset(), request.getPredictionFeature()); LinearRegression linreg = new LinearRegression(); String[] linRegOptions = { "-S", "1", "-C" }; linreg.setOptions(linRegOptions); linreg.buildClassifier(data); WekaModel model = new WekaModel(); model.setClassifier(linreg); String pmml = PmmlUtils.createRegressionModel(features, request.getPredictionFeature(), linreg.coefficients(), "MLR"); TrainingResponse response = new TrainingResponse(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutput out = new ObjectOutputStream(baos); out.writeObject(model); String base64Model = Base64.getEncoder().encodeToString(baos.toByteArray()); response.setRawModel(base64Model); List<String> independentFeatures = features.stream() .filter(feature -> !feature.equals(request.getPredictionFeature())) .collect(Collectors.toList()); response.setIndependentFeatures(independentFeatures); response.setPmmlModel(pmml); String predictionFeatureName = request.getDataset().getFeatures().stream() .filter(f -> f.getURI().equals(request.getPredictionFeature())).findFirst().get().getName(); response.setAdditionalInfo(Arrays.asList(request.getPredictionFeature(), predictionFeatureName)); response.setPredictedFeatures(Arrays.asList("Weka MLR prediction of " + predictionFeatureName)); return Response.ok(response).build(); } catch (Exception ex) { Logger.getLogger(WekaMLR.class.getName()).log(Level.SEVERE, null, ex); return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(ex.getMessage()).build(); } }
From source file:org.jaqpot.algorithms.resource.WekaMLR.java
License:Open Source License
@POST @Path("training") public Response training(TrainingRequest request) { try {/* w ww.ja va2 s .c o m*/ if (request.getDataset().getDataEntry().isEmpty() || request.getDataset().getDataEntry().get(0).getValues().isEmpty()) { return Response.status(Response.Status.BAD_REQUEST) .entity("Dataset is empty. Cannot train model on empty dataset.").build(); } List<String> features = request.getDataset().getDataEntry().stream().findFirst().get().getValues() .keySet().stream().collect(Collectors.toList()); Instances data = InstanceUtils.createFromDataset(request.getDataset(), request.getPredictionFeature()); LinearRegression linreg = new LinearRegression(); String[] linRegOptions = { "-S", "1", "-C" }; linreg.setOptions(linRegOptions); linreg.buildClassifier(data); WekaModel model = new WekaModel(); model.setClassifier(linreg); String pmml = PmmlUtils.createRegressionModel(features, request.getPredictionFeature(), linreg.coefficients(), "MLR"); TrainingResponse response = new TrainingResponse(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutput out = new ObjectOutputStream(baos); out.writeObject(model); String base64Model = Base64.getEncoder().encodeToString(baos.toByteArray()); response.setRawModel(base64Model); List<String> independentFeatures = features.stream() .filter(feature -> !feature.equals(request.getPredictionFeature())) .collect(Collectors.toList()); response.setIndependentFeatures(independentFeatures); response.setPmmlModel(pmml); String predictionFeatureName = request.getDataset().getFeatures().stream() .filter(f -> f.getURI().equals(request.getPredictionFeature())).findFirst().get().getName(); response.setAdditionalInfo(Arrays.asList(request.getPredictionFeature(), predictionFeatureName)); response.setPredictedFeatures(Arrays.asList("Weka MLR prediction of " + predictionFeatureName)); return Response.ok(response).build(); } catch (Exception ex) { Logger.getLogger(WekaMLR.class.getName()).log(Level.SEVERE, null, ex); return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(ex.getMessage()).build(); } }
From source file:org.opentox.jaqpot3.qsar.util.PMMLGenerator.java
License:Open Source License
private static String generateMLR(Model model) throws JaqpotException { LinearRegression wekaModel = (LinearRegression) model.getActualModel(); String uuid = model.getUri().getId(); String PMMLIntro = Configuration.getStringProperty("pmml.intro"); StringBuilder pmml = new StringBuilder(); try {/*from w ww . ja va 2s . c o m*/ final double[] coefficients = wekaModel.coefficients(); pmml.append("<?xml version=\"1.0\" ?>"); pmml.append(PMMLIntro); pmml.append("<Model ID=\"" + uuid + "\" Name=\"MLR Model\">\n"); pmml.append("<AlgorithmID href=\"" + Configuration.BASE_URI + "/algorithm/mlr\"/>\n"); // URI trainingDatasetURI = URI.create(model.getDataset().getUri()); pmml.append("<DatasetID href=\"" + URLEncoder.encode(model.getDataset().toString(), Configuration.getStringProperty("jaqpot.urlEncoding")) + "\"/>\n"); pmml.append("<AlgorithmParameters />\n"); // pmml.append("<FeatureDefinitions>\n"); // for (Feature feature : model.getIndependentFeatures()) { // pmml.append("<link href=\"" + feature.getUri().toString() + "\"/>\n"); // } // pmml.append("<target index=\"" + data.attribute(model.getPredictedFeature().getUri().toString()).index() + "\" name=\"" + model.getPredictedFeature().getUri().toString() + "\"/>\n"); // pmml.append("</FeatureDefinitions>\n"); pmml.append("<Timestamp>" + java.util.GregorianCalendar.getInstance().getTime() + "</Timestamp>\n"); pmml.append("</Model>\n"); pmml.append("<DataDictionary numberOfFields=\"" + model.getIndependentFeatures().size() + "\" >\n"); for (Feature feature : model.getIndependentFeatures()) { pmml.append("<DataField name=\"" + feature.getUri().toString() + "\" optype=\"continuous\" dataType=\"double\" />\n"); } pmml.append("</DataDictionary>\n"); // RegressionModel pmml.append("<RegressionModel modelName=\"" + uuid.toString() + "\"" + " functionName=\"regression\" modelType=\"linearRegression\"" + " algorithmName=\"linearRegression\"" + " targetFieldName=\"" + model.getDependentFeatures().iterator().next().getUri().toString() + "\"" + ">\n"); // RegressionModel::MiningSchema pmml.append("<MiningSchema>\n"); for (Feature feature : model.getIndependentFeatures()) { pmml.append("<MiningField name=\"" + feature.getUri().toString() + "\" />\n"); } pmml.append("<MiningField name=\"" + model.getDependentFeatures().iterator().next().getUri().toString() + "\" " + "usageType=\"predicted\"/>\n"); pmml.append("</MiningSchema>\n"); // RegressionModel::RegressionTable pmml.append("<RegressionTable intercept=\"" + coefficients[coefficients.length - 1] + "\">\n"); for (int k = 0; k < model.getIndependentFeatures().size(); k++) { pmml.append("<NumericPredictor name=\"" + model.getIndependentFeatures().get(k).getUri().toString() + "\" " + " exponent=\"1\" " + "coefficient=\"" + coefficients[k] + "\"/>\n"); } pmml.append("</RegressionTable>\n"); pmml.append("</RegressionModel>\n"); pmml.append("</PMML>\n\n"); } catch (UnsupportedEncodingException ex) { String message = "Character Encoding :'" + Configuration.getStringProperty("jaqpot.urlEncoding") + "' is not supported."; logger.debug(message, ex); throw new JaqpotException(message, ex); } catch (Exception ex) { String message = "Unexpected exception was caught while generating" + " the PMML representaition of a trained model."; logger.error(message, ex); throw new JaqpotException(message, ex); } return pmml.toString(); }
From source file:org.opentox.jaqpot3.qsar.util.PMMLProcess.java
License:Open Source License
private static String generateMLR(Model model) throws JaqpotException { LinearRegression wekaModel = (LinearRegression) model.getActualModel().getSerializableActualModel(); byte[] pmmlFile = model.getActualModel().getPmml(); String uuid = model.getUri().getId(); String PMMLIntro = Configuration.getStringProperty("pmml.intro"); StringBuilder pmml = new StringBuilder(); try {// w ww . ja v a 2 s . c om final double[] coefficients = wekaModel.coefficients(); pmml.append("<?xml version=\"1.0\" ?>"); pmml.append(PMMLIntro); pmml.append("<Model ID=\"" + uuid + "\" Name=\"MLR Model\">\n"); pmml.append("<AlgorithmID href=\"" + Configuration.BASE_URI + "algorithm/mlr\"/>\n"); pmml.append("<AlgorithmParameters />\n"); pmml.append("<Timestamp>" + java.util.GregorianCalendar.getInstance().getTime() + "</Timestamp>\n"); pmml.append("</Model>\n"); pmml.append("<DataDictionary numberOfFields=\"" + model.getIndependentFeatures().size() + "\" >\n"); for (Feature feature : model.getIndependentFeatures()) { pmml.append("<DataField name=\"" + feature.getUri().toString() + "\" optype=\"continuous\" dataType=\"double\" />\n"); } pmml.append("</DataDictionary>\n"); if (pmmlFile != null) { if (pmmlFile.length > 0) { String TrDictionaryString = getTransformationDictionaryXML(pmmlFile); pmml.append(TrDictionaryString + "\n"); } } String dependentFeatures = (model.getDependentFeatures().isEmpty()) ? "" : model.getDependentFeatures().iterator().next().getUri().toString(); // RegressionModel pmml.append("<RegressionModel modelName=\"" + uuid.toString() + "\"" + " functionName=\"regression\" modelType=\"linearRegression\"" + " algorithmName=\"linearRegression\">\n"); // RegressionModel::MiningSchema pmml.append("<MiningSchema>\n"); for (Feature feature : model.getIndependentFeatures()) { pmml.append("<MiningField name=\"" + feature.getUri().toString() + "\" />\n"); } pmml.append("<MiningField name=\"" + dependentFeatures + "\" " + "usageType=\"predicted\"/>\n"); pmml.append("</MiningSchema>\n"); // RegressionModel::RegressionTable pmml.append("<RegressionTable intercept=\"" + coefficients[coefficients.length - 1] + "\">\n"); int k = 0; for (k = 0; k < model.getIndependentFeatures().size(); k++) { pmml.append("<NumericPredictor name=\"" + model.getIndependentFeatures().get(k).getUri().toString() + "\" " + " exponent=\"1\" " + "coefficient=\"" + coefficients[k] + "\"/>\n"); } if (pmmlFile != null) { if (pmmlFile.length > 0) { PMML pmmlObject = loadPMMLObject(pmmlFile); TransformationDictionary trDir = pmmlObject.getTransformationDictionary(); if (trDir != null) { int trDirSize = trDir.getDerivedFields().size(); int j = 0; while (j < trDirSize) { pmml.append("<NumericPredictor name=\"" + trDir.getDerivedFields().get(j).getName().toString() + "\" " + " exponent=\"1\" " + "coefficient=\"" + coefficients[k] + "\"/>\n"); ++k; ++j; } } } } pmml.append("</RegressionTable>\n"); pmml = getPMMLStats(pmml, model.getDataset().toString(), model.getActualModel().getStatistics().toString()); pmml.append("</RegressionModel>\n"); pmml.append("</PMML>\n\n"); } catch (UnsupportedEncodingException ex) { String message = "Character Encoding :'" + Configuration.getStringProperty("jaqpot.urlEncoding") + "' is not supported."; logger.debug(message, ex); throw new JaqpotException(message, ex); } catch (Exception ex) { String message = "Unexpected exception was caught while generating" + " the PMML representaition of a trained model."; logger.error(message, ex); throw new JaqpotException(message, ex); } return pmml.toString(); }
From source file:org.opentox.qsar.processors.trainers.regression.MLRTrainer.java
License:Open Source License
/** * Generates the PMML representation of the model and stores in the hard * disk.//w w w . j av a2 s . c o m * @param coefficients The vector of the coefficients of the MLR model. * @param model_id The id of the generated model. * TODO: build the XML using some XML editor */ // <editor-fold defaultstate="collapsed" desc="PMML generation routine!"> private void generatePMML(final LinearRegression wekaModel, final Instances data) throws YaqpIOException { final double[] coefficients = wekaModel.coefficients(); StringBuilder pmml = new StringBuilder(); pmml.append("<?xml version=\"1.0\" ?>"); pmml.append(PMMLIntro); pmml.append("<Model ID=\"" + uuid.toString() + "\" Name=\"MLR Model\">\n"); pmml.append("<AlgorithmID href=\"" + Configuration.BASE_URI + "/algorithm/mlr\"/>\n"); pmml.append("<DatasetID href=\"" + datasetUri + "\"/>\n"); pmml.append("<AlgorithmParameters />\n"); pmml.append("<FeatureDefinitions>\n"); for (int k = 1; k <= data.numAttributes(); k++) { pmml.append("<link href=\"" + data.attribute(k - 1).name() + "\"/>\n"); } pmml.append("<target index=\"" + data.attribute(predictionFeature).index() + "\" name=\"" + predictionFeature + "\"/>\n"); pmml.append("</FeatureDefinitions>\n"); pmml.append("<Timestamp>" + java.util.GregorianCalendar.getInstance().getTime() + "</Timestamp>\n"); pmml.append("</Model>\n"); pmml.append("<DataDictionary numberOfFields=\"" + data.numAttributes() + "\" >\n"); for (int k = 0; k <= data.numAttributes() - 1; k++) { pmml.append("<DataField name=\"" + data.attribute(k).name() + "\" optype=\"continuous\" dataType=\"double\" />\n"); } pmml.append("</DataDictionary>\n"); // RegressionModel pmml.append("<RegressionModel modelName=\"" + uuid.toString() + "\"" + " functionName=\"regression\"" + " modelType=\"linearRegression\"" + " algorithmName=\"linearRegression\"" + " targetFieldName=\"" + data.classAttribute().name() + "\"" + ">\n"); // RegressionModel::MiningSchema pmml.append("<MiningSchema>\n"); for (int k = 0; k <= data.numAttributes() - 1; k++) { if (k != data.classIndex()) { pmml.append("<MiningField name=\"" + data.attribute(k).name() + "\" />\n"); } } pmml.append("<MiningField name=\"" + data.attribute(data.classIndex()).name() + "\" " + "usageType=\"predicted\"/>\n"); pmml.append("</MiningSchema>\n"); // RegressionModel::RegressionTable pmml.append("<RegressionTable intercept=\"" + coefficients[coefficients.length - 1] + "\">\n"); for (int k = 0; k <= data.numAttributes() - 1; k++) { if (!(predictionFeature.equals(data.attribute(k).name()))) { pmml.append("<NumericPredictor name=\"" + data.attribute(k).name() + "\" " + " exponent=\"1\" " + "coefficient=\"" + coefficients[k] + "\"/>\n"); } } pmml.append("</RegressionTable>\n"); pmml.append("</RegressionModel>\n"); pmml.append("</PMML>\n\n"); try { FileWriter fwriter = new FileWriter(ServerFolders.models_pmml + "/" + uuid.toString()); BufferedWriter writer = new BufferedWriter(fwriter); writer.write(pmml.toString()); writer.flush(); writer.close(); } catch (IOException ex) { throw new YaqpIOException(Cause.XQReg3, "Could not write data to PMML file :" + uuid.toString(), ex); } }