Example usage for weka.classifiers.functions LinearRegression LinearRegression

Introduction

In this page you can find the example usage for weka.classifiers.functions LinearRegression LinearRegression.

Prototype

public LinearRegression()

Source Link

Usage

From source file:org.jaqpot.algorithm.resource.WekaMLR.java

License:Open Source License

@POST
@Path("training")
public Response training(TrainingRequest request) {

    try {/*from  w  w w  .java2 s .  co m*/
        if (request.getDataset().getDataEntry().isEmpty()
                || request.getDataset().getDataEntry().get(0).getValues().isEmpty()) {
            return Response.status(Response.Status.BAD_REQUEST).entity(
                    ErrorReportFactory.badRequest("Dataset is empty", "Cannot train model on empty dataset"))
                    .build();
        }
        List<String> features = request.getDataset().getDataEntry().stream().findFirst().get().getValues()
                .keySet().stream().collect(Collectors.toList());

        Instances data = InstanceUtils.createFromDataset(request.getDataset(), request.getPredictionFeature());

        LinearRegression linreg = new LinearRegression();
        String[] linRegOptions = { "-S", "1", "-C" };
        linreg.setOptions(linRegOptions);
        linreg.buildClassifier(data);

        WekaModel model = new WekaModel();
        model.setClassifier(linreg);

        String pmml = PmmlUtils.createRegressionModel(features, request.getPredictionFeature(),
                linreg.coefficients(), "MLR");

        TrainingResponse response = new TrainingResponse();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutput out = new ObjectOutputStream(baos);
        out.writeObject(model);
        String base64Model = Base64.getEncoder().encodeToString(baos.toByteArray());
        response.setRawModel(base64Model);
        List<String> independentFeatures = features.stream()
                .filter(feature -> !feature.equals(request.getPredictionFeature()))
                .collect(Collectors.toList());
        response.setIndependentFeatures(independentFeatures);
        response.setPmmlModel(pmml);
        String predictionFeatureName = request.getDataset().getFeatures().stream()
                .filter(f -> f.getURI().equals(request.getPredictionFeature())).findFirst().get().getName();
        response.setAdditionalInfo(Arrays.asList(request.getPredictionFeature(), predictionFeatureName));

        response.setPredictedFeatures(Arrays.asList("Weka MLR prediction of " + predictionFeatureName));

        return Response.ok(response).build();
    } catch (Exception ex) {
        Logger.getLogger(WekaMLR.class.getName()).log(Level.SEVERE, null, ex);
        return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(ex.getMessage()).build();
    }
}

From source file:org.jaqpot.algorithms.resource.WekaMLR.java

License:Open Source License

@POST
@Path("training")
public Response training(TrainingRequest request) {

    try {/*from   w w w. jav  a 2s .  co m*/
        if (request.getDataset().getDataEntry().isEmpty()
                || request.getDataset().getDataEntry().get(0).getValues().isEmpty()) {
            return Response.status(Response.Status.BAD_REQUEST)
                    .entity("Dataset is empty. Cannot train model on empty dataset.").build();
        }
        List<String> features = request.getDataset().getDataEntry().stream().findFirst().get().getValues()
                .keySet().stream().collect(Collectors.toList());

        Instances data = InstanceUtils.createFromDataset(request.getDataset(), request.getPredictionFeature());

        LinearRegression linreg = new LinearRegression();
        String[] linRegOptions = { "-S", "1", "-C" };
        linreg.setOptions(linRegOptions);
        linreg.buildClassifier(data);

        WekaModel model = new WekaModel();
        model.setClassifier(linreg);

        String pmml = PmmlUtils.createRegressionModel(features, request.getPredictionFeature(),
                linreg.coefficients(), "MLR");

        TrainingResponse response = new TrainingResponse();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutput out = new ObjectOutputStream(baos);
        out.writeObject(model);
        String base64Model = Base64.getEncoder().encodeToString(baos.toByteArray());
        response.setRawModel(base64Model);
        List<String> independentFeatures = features.stream()
                .filter(feature -> !feature.equals(request.getPredictionFeature()))
                .collect(Collectors.toList());
        response.setIndependentFeatures(independentFeatures);
        response.setPmmlModel(pmml);
        String predictionFeatureName = request.getDataset().getFeatures().stream()
                .filter(f -> f.getURI().equals(request.getPredictionFeature())).findFirst().get().getName();
        response.setAdditionalInfo(Arrays.asList(request.getPredictionFeature(), predictionFeatureName));

        response.setPredictedFeatures(Arrays.asList("Weka MLR prediction of " + predictionFeatureName));

        return Response.ok(response).build();
    } catch (Exception ex) {
        Logger.getLogger(WekaMLR.class.getName()).log(Level.SEVERE, null, ex);
        return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(ex.getMessage()).build();
    }
}

From source file:org.opentox.jaqpot3.qsar.trainer.MlrRegression.java

License:Open Source License

@Override
public Model train(Instances data) throws JaqpotException {
    try {/*from www  .  j  av a 2s  .  c o  m*/

        getTask().getMeta().addComment(
                "Dataset successfully retrieved and converted " + "into a weka.core.Instances object");
        UpdateTask firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        Instances trainingSet = data;
        getTask().getMeta().addComment("The downloaded dataset is now preprocessed");
        firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        /* SET CLASS ATTRIBUTE */
        Attribute target = trainingSet.attribute(targetUri.toString());
        if (target == null) {
            throw new BadParameterException("The prediction feature you provided was not found in the dataset");
        } else {
            if (!target.isNumeric()) {
                throw new QSARException("The prediction feature you provided is not numeric.");
            }
        }
        trainingSet.setClass(target);
        /* Very important: place the target feature at the end! (target = last)*/
        int numAttributes = trainingSet.numAttributes();
        int classIndex = trainingSet.classIndex();
        Instances orderedTrainingSet = null;
        List<String> properOrder = new ArrayList<String>(numAttributes);
        for (int j = 0; j < numAttributes; j++) {
            if (j != classIndex) {
                properOrder.add(trainingSet.attribute(j).name());
            }
        }
        properOrder.add(trainingSet.attribute(classIndex).name());
        try {
            orderedTrainingSet = InstancesUtil.sortByFeatureAttrList(properOrder, trainingSet, -1);
        } catch (JaqpotException ex) {
            logger.error("Improper dataset - training will stop", ex);
            throw ex;
        }
        orderedTrainingSet.setClass(orderedTrainingSet.attribute(targetUri.toString()));

        /* START CONSTRUCTION OF MODEL */
        Model m = new Model(Configuration.getBaseUri().augment("model", getUuid().toString()));
        m.setAlgorithm(getAlgorithm());
        m.setCreatedBy(getTask().getCreatedBy());
        m.setDataset(datasetUri);
        m.addDependentFeatures(dependentFeature);
        try {
            dependentFeature.loadFromRemote();
        } catch (ServiceInvocationException ex) {
            Logger.getLogger(MlrRegression.class.getName()).log(Level.SEVERE, null, ex);
        }

        Set<LiteralValue> depFeatTitles = null;
        if (dependentFeature.getMeta() != null) {
            depFeatTitles = dependentFeature.getMeta().getTitles();
        }

        String depFeatTitle = dependentFeature.getUri().toString();
        if (depFeatTitles != null) {
            depFeatTitle = depFeatTitles.iterator().next().getValueAsString();
            m.getMeta().addTitle("MLR model for " + depFeatTitle)
                    .addDescription("MLR model for the prediction of " + depFeatTitle + " (uri: "
                            + dependentFeature.getUri() + " ).");
        } else {
            m.getMeta().addTitle("MLR model for the prediction of the feature with URI " + depFeatTitle)
                    .addComment("No name was found for the feature " + depFeatTitle);
        }

        /*
         * COMPILE THE LIST OF INDEPENDENT FEATURES with the exact order in which
         * these appear in the Instances object (training set).
         */
        m.setIndependentFeatures(independentFeatures);

        /* CREATE PREDICTED FEATURE AND POST IT TO REMOTE SERVER */
        String predictionFeatureUri = null;
        Feature predictedFeature = publishFeature(m, dependentFeature.getUnits(),
                "Predicted " + depFeatTitle + " by MLR model", datasetUri, featureService);
        m.addPredictedFeatures(predictedFeature);
        predictionFeatureUri = predictedFeature.getUri().toString();

        getTask().getMeta().addComment("Prediction feature " + predictionFeatureUri + " was created.");

        firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        /* ACTUAL TRAINING OF THE MODEL USING WEKA */
        LinearRegression linreg = new LinearRegression();
        String[] linRegOptions = { "-S", "1", "-C" };

        try {
            linreg.setOptions(linRegOptions);
            linreg.buildClassifier(orderedTrainingSet);

        } catch (final Exception ex) {// illegal options or could not build the classifier!
            String message = "MLR Model could not be trained";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }

        try {
            // evaluate classifier and print some statistics
            Evaluation eval = new Evaluation(orderedTrainingSet);
            eval.evaluateModel(linreg, orderedTrainingSet);
            String stats = eval.toSummaryString("\nResults\n======\n", false);

            ActualModel am = new ActualModel(linreg);
            am.setStatistics(stats);
            m.setActualModel(am);
        } catch (NotSerializableException ex) {
            String message = "Model is not serializable";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        } catch (final Exception ex) {// illegal options or could not build the classifier!
            String message = "MLR Model could not be trained";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }

        m.getMeta().addPublisher("OpenTox").addComment("This is a Multiple Linear Regression Model");

        //save the instances being predicted to abstract trainer for calculating DoA
        predictedInstances = orderedTrainingSet;
        excludeAttributesDoA.add(dependentFeature.getUri().toString());

        return m;
    } catch (QSARException ex) {
        String message = "QSAR Exception: cannot train MLR model";
        logger.error(message, ex);
        throw new JaqpotException(message, ex);
    }
}

From source file:org.opentox.qsar.processors.trainers.regression.MLRTrainer.java

License:Open Source License

/**
 * Trains the MLR model given an Instances object with the training data. The prediction
 * feature (class attributre) is specified in the constructor of the class.
 * @param data The training data as <code>weka.core.Instances</code> object.
 * @return The QSARModel corresponding to the trained model.
 * @throws QSARException In case the model cannot be trained
 * <p>/* w  w w .  ja va  2s  .  c o m*/
 * <table>
 * <thead>
 * <tr>
 * <td><b>Code</b></td><td><b>Explanation</b></td>
 * </tr>
 * </thead>
 * <tbody>
 * <tr>
 * <td>XQReg1</td><td>Could not train the an model</td>
 * </tr>
 * <tr>
 * <td>XQReg2</td><td>Could not generate PMML representation for the model</td>
 * </tr>
 * <tr>
 * <td>XQReg202</td><td>The prediction feature you provided is not a valid numeric attribute of the dataset</td>
 * </tr>
 * </tbody>
 * </table>
 * </p>
 * @throws NullPointerException
 *      In case the provided training data is null.
 */
public QSARModel train(Instances data) throws QSARException {

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
    } catch (final IOException ex) {
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    LinearRegression linreg = new LinearRegression();
    String[] linRegOptions = { "-S", "1", "-C" };
    try {
        linreg.setOptions(linRegOptions);
        linreg.buildClassifier(data);
    } catch (final Exception ex) {// illegal options or could not build the classifier!
        String message = "MLR Model could not be trained";
        YaqpLogger.LOG.log(new Trace(getClass(), message + " :: " + ex));
        throw new QSARException(Cause.XQReg1, message, ex);
    }

    try {
        generatePMML(linreg, data);
    } catch (final YaqpIOException ex) {
        String message = "Could not generate PMML representation for MLR model :: " + ex;
        throw new QSARException(Cause.XQReg2, message, ex);
    }

    // PERFORM THE TRAINING
    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", ServerFolders.models_weka + "/" + uuid };
    try {
        Evaluation.evaluateModel(linreg, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex);
    }

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;

    QSARModel model = new QSARModel(uuid.toString(), predictedFeature, dependentFeature, independentFeatures,
            YaqpAlgorithms.MLR, new User(), null, datasetUri, ModelStatus.UNDER_DEVELOPMENT);
    model.setParams(new HashMap<String, AlgorithmParameter>());

    return model;

}

From source file:Prediccion.Prediccion.java

License:Open Source License

@Override
public void run() {
    try {/*from   w w w  .  j  a  v  a 2s.c  om*/

        ArrayList<Instances> pasos = cargarDatos();

        System.err.println(pasos.size());

        //Instanciamos el predictor
        ArrayList<WekaForecaster> forecaster = new ArrayList<>(24);

        for (int a = 0; a < 24; a++) {
            forecaster.add(new WekaForecaster());
        }

        int a = 0;

        for (WekaForecaster fore : forecaster) {

            //Defimimos el atributo que queremos predecir
            fore.setFieldsToForecast("Total");

            //Definimos el mtodo de prediccin a emplear. En este caso, regresin lineal porque 
            //en el artculo es el que mejor ha funcionado
            fore.setBaseForecaster(new LinearRegression());

            //Defimimos el atributo que "marca" el tiempo y su peridiocidad
            fore.getTSLagMaker().setTimeStampField("Intervalo");
            fore.getTSLagMaker().setMinLag(1);
            fore.getTSLagMaker().setMaxLag(1);

            fore.getTSLagMaker().setPeriodicity(TSLagMaker.Periodicity.WEEKLY);

            fore.buildForecaster(pasos.get(a), System.out);

            // System.err.println(pasos.get(a).toString());

            //System.err.printf("Termin");

            fore.primeForecaster(pasos.get(a));

            List<List<NumericPrediction>> forecast = fore.forecast(1, System.out);

            System.err.println("==== " + a + " ====");
            // output the predictions. Outer list is over the steps; inner list is over
            // the targets
            for (int i = 0; i < 1; i++) {
                List<NumericPrediction> predsAtStep = forecast.get(i);
                for (int j = 0; j < 1; j++) {
                    NumericPrediction predForTarget = predsAtStep.get(j);
                    System.err.print("" + predForTarget.predicted() + " ");
                }
                System.err.println();
            }
            a++;
        }
        /*    
                    
           // path to the Australian wine data included with the time series forecasting
           // package
           String pathToWineData = weka.core.WekaPackageManager.PACKAGES_DIR.toString()
             + File.separator + "timeseriesForecasting" + File.separator + "sample-data"
             + File.separator + "wine.arff";
                
           // load the wine data
           Instances wine = new Instances(new BufferedReader(new FileReader(pathToWineData)));      
                   
           // new forecaster
           WekaForecaster forecaster = new WekaForecaster();
                
           // set the targets we want to forecast. This method calls
           // setFieldsToLag() on the lag maker object for us
           forecaster.setFieldsToForecast("Fortified,Dry-white");
                
           // default underlying classifier is SMOreg (SVM) - we'll use
           // gaussian processes for regression instead
           forecaster.setBaseForecaster(new GaussianProcesses());
                
           forecaster.getTSLagMaker().setTimeStampField("Date"); // date time stamp
           forecaster.getTSLagMaker().setMinLag(1);
           forecaster.getTSLagMaker().setMaxLag(12); // monthly data
                
           // add a month of the year indicator field
           forecaster.getTSLagMaker().setAddMonthOfYear(true);
                
           // add a quarter of the year indicator field
           forecaster.getTSLagMaker().setAddQuarterOfYear(true);
                
           // build the model
           forecaster.buildForecaster(wine, System.out);
                
           // prime the forecaster with enough recent historical data
           // to cover up to the maximum lag. In our case, we could just supply
           // the 12 most recent historical instances, as this covers our maximum
           // lag period
           forecaster.primeForecaster(wine);
                
           // forecast for 12 units (months) beyond the end of the
           // training data
           <<List<List<NumericPrediction>> forecast = forecaster.forecast(12, System.out);
                
                   
                   
           // output the predictions. Outer list is over the steps; inner list is over
           // the targets
           for (int i = 0; i < 12; i++) {
             List<NumericPrediction> predsAtStep = forecast.get(i);
             for (int j = 0; j < 2; j++) {
               NumericPrediction predForTarget = predsAtStep.get(j);
               System.out.print("" + predForTarget.predicted() + " ");
             }
             System.out.println();
           }
                
           // we can continue to use the trained forecaster for further forecasting
           // by priming with the most recent historical data (as it becomes available).
           // At some stage it becomes prudent to re-build the model using current
           // historical data.
        */
    } catch (Exception ex) {
        ex.printStackTrace();
    }

}

From source file:src.BestFirst.java

License:Open Source License

/**
 * Searches the attribute subset space by best first search
 *
 * @param data the training instances.//from  w  w  w.ja  v a 2  s.c  o  m
 * @return an array (not necessarily ordered) of selected attribute indexes
 * @throws Exception if the search can't be completed
 */
public int[] search(Instances data, TSLagMaker tsLagMaker, List<String> overlayFields) throws Exception {
    long startTime = System.currentTimeMillis(), stopTime;
    TSWrapper tsWrapper = new TSWrapper();
    tsWrapper.buildEvaluator(data);
    String m_EvaluationMeasure = "RMSE";
    tsWrapper.setM_EvaluationMeasure(m_EvaluationMeasure);
    System.out.println("Using " + m_EvaluationMeasure + " as a evaluation Measure");
    LinearRegression linearRegression = new LinearRegression();
    linearRegression.setOptions(weka.core.Utils.splitOptions("-S 1 -R 1E-6"));

    MLPRegressor mlpRegressor = new MLPRegressor();
    mlpRegressor.setOptions(weka.core.Utils.splitOptions("-P 5 -E 5 -N 2"));
    tsWrapper.setM_BaseClassifier(mlpRegressor);
    System.out.println("Using best First and MLPReg as classifier.");
    m_numAttribs = data.numAttributes();
    SubsetHandler subsetHandler = new SubsetHandler();
    subsetHandler.setM_numAttribs(m_numAttribs);
    m_totalEvals = 0;
    int i, j;
    int best_size = 0;
    int size = 0;
    int done;
    int searchDirection = m_searchDirection;
    BitSet best_group, temp_group;
    int stale;
    double best_merit;
    double merit;
    boolean z;
    boolean added;
    Double bias = 0.;
    Hashtable<String, Double> lookForExistingSubsets = new Hashtable<String, Double>();
    int insertCount = 0;
    LinkedList2 prioQueueList = new LinkedList2(m_maxStale);
    best_merit = -Double.MAX_VALUE;
    stale = 0;
    int startSetPercentage = 0;
    best_group = subsetHandler.getStartSet(startSetPercentage);

    m_startRange.setUpper(m_numAttribs - 1);
    if (!(getStartSet().equals("")))
        m_starting = m_startRange.getSelection();
    // If a starting subset has been supplied, then initialise the bitset
    if (m_starting != null) {
        for (i = 0; i < m_starting.length; i++)
            if ((m_starting[i]) != m_classIndex)
                best_group.set(m_starting[i]);
        best_size = m_starting.length;
        m_totalEvals++;
    } else {
        if (m_searchDirection == SELECTION_BACKWARD) {
            //setStartSet("1-last");
            //m_starting = new int[m_numAttribs];
            // init initial subset to all attributes
            for (i = 11, j = 0; i < m_numAttribs; i++) {
                if (i != m_classIndex) {
                    best_group.set(i);
                    //m_starting[j++] = i;
                }
            }
            best_size = m_numAttribs - 1;
            m_totalEvals++;
        }
    }
    // evaluate the initial subset
    best_merit = -tsWrapper.evaluateSubset(best_group, tsLagMaker, overlayFields, false);
    //printGroup(best_group, m_numAttribs);
    System.out.println("Merit:" + best_merit);
    System.out.print("Group: ");
    subsetHandler.printGroup(best_group);
    System.out.println("\n");
    m_totalEvals++;
    // add the initial group to the list and the hash table
    Object[] best = new Object[1];
    best[0] = best_group.clone();
    prioQueueList.addToList(best, best_merit);
    String hashedGroup = best_group.toString();
    lookForExistingSubsets.put(hashedGroup, new Double(best_merit));
    System.out.println("StartsetPercentage:" + startSetPercentage + ", maxStale:" + m_maxStale);

    while (stale < m_maxStale) {
        added = false;
        if (m_searchDirection == SELECTION_BIDIRECTIONAL) {
            // bi-directional search
            done = 2;
            searchDirection = SELECTION_FORWARD;
        } else {
            done = 1;
        }
        // finished search?
        if (prioQueueList.size() == 0) {
            stale = m_maxStale;
            break;
        }
        // copy the attribute set at the head of the list
        temp_group = (BitSet) (prioQueueList.getLinkAt(0).getData()[0]);
        temp_group = (BitSet) temp_group.clone();
        // remove the head of the list
        prioQueueList.removeLinkAt(0);
        // count the number of bits set (attributes)
        int kk;
        for (kk = 0, size = 0; kk < m_numAttribs; kk++)
            if (temp_group.get(kk))
                size++;
        do {
            for (i = 11; i < m_numAttribs - 2; i++) { //setting it to 11 to skip overlay fields, time stamps etc.
                if (searchDirection == SELECTION_FORWARD)
                    z = ((i != m_classIndex) && (!temp_group.get(i)));
                else
                    z = ((i != m_classIndex) && (temp_group.get(i)));
                if (z) {
                    // set the bit (attribute to add/delete)
                    if (searchDirection == SELECTION_FORWARD) {
                        temp_group.set(i);
                        size++;
                    } else {
                        temp_group.clear(i);
                        size--;
                    }
                    /*
                     * if this subset has been seen before, then it is already in the
                     * list (or has been fully expanded)
                    */
                    hashedGroup = temp_group.toString();

                    if (lookForExistingSubsets.containsKey(hashedGroup) == false) {
                        //System.out.println("Before eval:" + temp_group);
                        merit = -tsWrapper.evaluateSubset(temp_group, tsLagMaker, overlayFields, false);
                        System.out.println("Merit: " + merit);
                        System.out.print("Group: ");

                        subsetHandler.printGroup(temp_group);
                        System.out.println("\n");
                        m_totalEvals++;

                        hashedGroup = temp_group.toString();
                        lookForExistingSubsets.put(hashedGroup, new Double(merit));
                        insertCount++;
                        // insert this one in the list

                    } else
                        merit = lookForExistingSubsets.get(hashedGroup).doubleValue();
                    Object[] add = new Object[1];
                    add[0] = temp_group.clone();
                    prioQueueList.addToList(add, merit);
                    if (m_debug) {
                        System.out.print("Group: ");
                        subsetHandler.printGroup(temp_group);
                        System.out.println("Merit: " + merit);
                    }

                    // is this better than the best?
                    if (searchDirection == SELECTION_FORWARD) {
                        z = (merit - best_merit) > 0.01; //they are both negative numbers; actually we are looking for the smallest error
                    } else {
                        if (merit == best_merit) {
                            z = (size < best_size);
                        } else {
                            z = (merit > best_merit);
                        }
                    }

                    if (z) {
                        added = true;
                        stale = 0;
                        System.out.println("Setting best merit to:" + merit);
                        best_merit = merit;
                        // best_size = (size + best_size);
                        best_size = size;
                        best_group = (BitSet) (temp_group.clone());
                    }

                    // unset this addition(deletion)
                    if (searchDirection == SELECTION_FORWARD) {
                        temp_group.clear(i);
                        size--;
                    } else {
                        temp_group.set(i);
                        size++;
                    }
                }
            }
            if (done == 2)
                searchDirection = SELECTION_BACKWARD;
            done--;
        } while (done > 0);
        /* if we haven't added a new attribute subset then full expansion of this
        * node hasen't resulted in anything better
        */
        if (!added) {
            stale++;
            System.out.println("Stale:" + stale);
        }
    }
    subsetHandler.printGroup(best_group);
    System.out.println("Best merit: " + best_merit);
    System.out.println(m_totalEvals);
    stopTime = System.currentTimeMillis();
    System.out.println("Time taken for wrapper part:" + ((double) stopTime - startTime) / 1000);
    m_bestMerit = best_merit;
    subsetHandler.includesMoreThanXPercentOfFeatures(best_group, true, 0);
    tsWrapper.evaluateSubset(best_group, tsLagMaker, overlayFields, true);
    return attributeList(best_group);
}

From source file:src.SimmulatedAnnealing.java

License:Open Source License

/**
 * Searches the attribute subset space by best first search
 *
 * @param data the training instances.//ww  w .j  ava  2  s. c o  m
 * @return an array (not necessarily ordered) of selected attribute indexes
 * @throws Exception if the search can't be completed
 */
public int[] search(Instances data, TSLagMaker tsLagMaker, List<String> overlayFields) throws Exception {
    long startTime = System.currentTimeMillis(), stopTime;
    m_totalEvals = 0;
    int m_totalEvals = 0;
    TSWrapper tsWrapper = new TSWrapper();
    tsWrapper.buildEvaluator(data);
    String m_EvaluationMeasure = "RMSE";
    tsWrapper.setM_EvaluationMeasure(m_EvaluationMeasure);
    System.out.println("Using " + m_EvaluationMeasure + " as a evaluation Measure");
    LinearRegression linearRegression = new LinearRegression();
    linearRegression.setOptions(weka.core.Utils.splitOptions("-S 1 -R 1E-6"));
    MLPRegressor mlpRegressor = new MLPRegressor();
    mlpRegressor.setOptions(weka.core.Utils.splitOptions("-P 4 -E 4 -N 2"));
    tsWrapper.setM_BaseClassifier(mlpRegressor);
    System.out.println("Using SA and MLPRegressor as classifier.");
    m_numAttribs = data.numAttributes();
    SubsetHandler subsetHandler = new SubsetHandler();
    subsetHandler.setM_numAttribs(m_numAttribs);
    BitSet best_group;
    best_group = subsetHandler.getStartSet(0);
    double temperature = 0.4, initialTemp = temperature, dropRate = 0.00012, limit = 0.0000001;
    double best_merit;
    int changedAltoughWorseCounter = 0;
    Hashtable<String, Double> lookForExistingSubsets = new Hashtable<String, Double>();
    // evaluate the initial subset
    subsetHandler.printGroup(best_group);
    best_merit = -tsWrapper.evaluateSubset(best_group, tsLagMaker, overlayFields, false);
    m_totalEvals++;
    String subset_string = best_group.toString();
    lookForExistingSubsets.put(subset_string, best_merit);
    System.out.println("Initial group w/ numAttribs: " + m_numAttribs + " temp: " + temperature + " drop rate:"
            + dropRate + " limit:" + limit);
    System.out.println("Merit: " + best_merit);
    TheVeryBest theVeryBest = new TheVeryBest((BitSet) best_group.clone(), best_merit);
    ArrayList<Boolean> changedAlthoughWorse = new ArrayList<Boolean>();
    while (temperature > limit) {
        changedAltoughWorseCounter = 0;
        BitSet s_new = subsetHandler.changeBits((BitSet) best_group.clone(), 1);
        subset_string = s_new.toString();
        if (!lookForExistingSubsets.containsKey(subset_string)) {
            double s_new_merit = -tsWrapper.evaluateSubset(s_new, tsLagMaker, overlayFields, false);
            m_totalEvals++;
            System.out.println("New merit: " + s_new_merit);
            lookForExistingSubsets.put(subset_string, s_new_merit);
            if (decisionFunction(s_new_merit - best_merit, temperature, best_merit, initialTemp)) {
                if (best_merit - s_new_merit > 0) //it means this is a worse set than the best set, and we still change the best set to it.
                    changedAlthoughWorse.add(true);
                best_group = (BitSet) s_new.clone();
                best_merit = s_new_merit;
            } else
                changedAlthoughWorse.add(false);
            for (int j = 0; j < changedAlthoughWorse.size(); j++)
                if (changedAlthoughWorse.get(j))
                    changedAltoughWorseCounter++;
            System.out.println("Percentage of worse sets accepted:"
                    + (float) changedAltoughWorseCounter * 100 / changedAlthoughWorse.size()
                    + " Arraylist size:" + changedAlthoughWorse.size() + " changedAlthoughworse counter:"
                    + changedAltoughWorseCounter);
            if (best_merit > theVeryBest.getMerit()) //we have negative values for the scores, so bigger is better
                theVeryBest.setNewSet((BitSet) best_group.clone(), best_merit);
            temperature = temperature / (float) (1 + dropRate * (m_totalEvals - 1));
        }
    }
    System.out.println("Best merit: " + theVeryBest.getMerit());
    System.out.println(m_totalEvals);
    stopTime = System.currentTimeMillis();
    System.out.println("Time taken for wrapper part:" + ((double) stopTime - startTime) / 1000);
    subsetHandler.printGroup(theVeryBest.getSubset());
    subsetHandler.includesMoreThanXPercentOfFeatures(theVeryBest.getSubset(), true, 0);
    tsWrapper.evaluateSubset(theVeryBest.getSubset(), tsLagMaker, overlayFields, true);
    return attributeList(theVeryBest.getSubset());
}

From source file:textmining.TextMining.java

private static String Regression_on_Polarity(Instances instances) throws Exception {
    Classifier regression = (Classifier) new LinearRegression();
    String[] options = weka.core.Utils.splitOptions("-S 2 -R 1.0E-8");
    return setOptions(regression, instances, options);
}

From source file:wekimini.learning.LinearRegressionModelBuilder.java

public LinearRegressionModelBuilder() {
    classifier = new LinearRegression();
    featureSelectionType = FeatureSelectionType.NONE;
    ((LinearRegression) classifier).setAttributeSelectionMethod(
            new SelectedTag(LinearRegression.SELECTION_NONE, LinearRegression.TAGS_SELECTION));
    ((LinearRegression) classifier).setEliminateColinearAttributes(removeColinear);
}