Example usage for weka.classifiers.functions LinearRegression LinearRegression

List of usage examples for weka.classifiers.functions LinearRegression LinearRegression

Introduction

In this page you can find the example usage for weka.classifiers.functions LinearRegression LinearRegression.

Prototype

public LinearRegression() 

Source Link

Usage

From source file:org.jaqpot.algorithm.resource.WekaMLR.java

License:Open Source License

@POST
@Path("training")
public Response training(TrainingRequest request) {

    try {/*from  w  w w  .java2 s .  co m*/
        if (request.getDataset().getDataEntry().isEmpty()
                || request.getDataset().getDataEntry().get(0).getValues().isEmpty()) {
            return Response.status(Response.Status.BAD_REQUEST).entity(
                    ErrorReportFactory.badRequest("Dataset is empty", "Cannot train model on empty dataset"))
                    .build();
        }
        List<String> features = request.getDataset().getDataEntry().stream().findFirst().get().getValues()
                .keySet().stream().collect(Collectors.toList());

        Instances data = InstanceUtils.createFromDataset(request.getDataset(), request.getPredictionFeature());

        LinearRegression linreg = new LinearRegression();
        String[] linRegOptions = { "-S", "1", "-C" };
        linreg.setOptions(linRegOptions);
        linreg.buildClassifier(data);

        WekaModel model = new WekaModel();
        model.setClassifier(linreg);

        String pmml = PmmlUtils.createRegressionModel(features, request.getPredictionFeature(),
                linreg.coefficients(), "MLR");

        TrainingResponse response = new TrainingResponse();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutput out = new ObjectOutputStream(baos);
        out.writeObject(model);
        String base64Model = Base64.getEncoder().encodeToString(baos.toByteArray());
        response.setRawModel(base64Model);
        List<String> independentFeatures = features.stream()
                .filter(feature -> !feature.equals(request.getPredictionFeature()))
                .collect(Collectors.toList());
        response.setIndependentFeatures(independentFeatures);
        response.setPmmlModel(pmml);
        String predictionFeatureName = request.getDataset().getFeatures().stream()
                .filter(f -> f.getURI().equals(request.getPredictionFeature())).findFirst().get().getName();
        response.setAdditionalInfo(Arrays.asList(request.getPredictionFeature(), predictionFeatureName));

        response.setPredictedFeatures(Arrays.asList("Weka MLR prediction of " + predictionFeatureName));

        return Response.ok(response).build();
    } catch (Exception ex) {
        Logger.getLogger(WekaMLR.class.getName()).log(Level.SEVERE, null, ex);
        return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(ex.getMessage()).build();
    }
}

From source file:org.jaqpot.algorithms.resource.WekaMLR.java

License:Open Source License

@POST
@Path("training")
public Response training(TrainingRequest request) {

    try {/*from   w w w. jav  a 2s .  co m*/
        if (request.getDataset().getDataEntry().isEmpty()
                || request.getDataset().getDataEntry().get(0).getValues().isEmpty()) {
            return Response.status(Response.Status.BAD_REQUEST)
                    .entity("Dataset is empty. Cannot train model on empty dataset.").build();
        }
        List<String> features = request.getDataset().getDataEntry().stream().findFirst().get().getValues()
                .keySet().stream().collect(Collectors.toList());

        Instances data = InstanceUtils.createFromDataset(request.getDataset(), request.getPredictionFeature());

        LinearRegression linreg = new LinearRegression();
        String[] linRegOptions = { "-S", "1", "-C" };
        linreg.setOptions(linRegOptions);
        linreg.buildClassifier(data);

        WekaModel model = new WekaModel();
        model.setClassifier(linreg);

        String pmml = PmmlUtils.createRegressionModel(features, request.getPredictionFeature(),
                linreg.coefficients(), "MLR");

        TrainingResponse response = new TrainingResponse();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutput out = new ObjectOutputStream(baos);
        out.writeObject(model);
        String base64Model = Base64.getEncoder().encodeToString(baos.toByteArray());
        response.setRawModel(base64Model);
        List<String> independentFeatures = features.stream()
                .filter(feature -> !feature.equals(request.getPredictionFeature()))
                .collect(Collectors.toList());
        response.setIndependentFeatures(independentFeatures);
        response.setPmmlModel(pmml);
        String predictionFeatureName = request.getDataset().getFeatures().stream()
                .filter(f -> f.getURI().equals(request.getPredictionFeature())).findFirst().get().getName();
        response.setAdditionalInfo(Arrays.asList(request.getPredictionFeature(), predictionFeatureName));

        response.setPredictedFeatures(Arrays.asList("Weka MLR prediction of " + predictionFeatureName));

        return Response.ok(response).build();
    } catch (Exception ex) {
        Logger.getLogger(WekaMLR.class.getName()).log(Level.SEVERE, null, ex);
        return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(ex.getMessage()).build();
    }
}

From source file:org.opentox.jaqpot3.qsar.trainer.MlrRegression.java

License:Open Source License

@Override
public Model train(Instances data) throws JaqpotException {
    try {/*from www  .  j  av a 2s  .  c o  m*/

        getTask().getMeta().addComment(
                "Dataset successfully retrieved and converted " + "into a weka.core.Instances object");
        UpdateTask firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        Instances trainingSet = data;
        getTask().getMeta().addComment("The downloaded dataset is now preprocessed");
        firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        /* SET CLASS ATTRIBUTE */
        Attribute target = trainingSet.attribute(targetUri.toString());
        if (target == null) {
            throw new BadParameterException("The prediction feature you provided was not found in the dataset");
        } else {
            if (!target.isNumeric()) {
                throw new QSARException("The prediction feature you provided is not numeric.");
            }
        }
        trainingSet.setClass(target);
        /* Very important: place the target feature at the end! (target = last)*/
        int numAttributes = trainingSet.numAttributes();
        int classIndex = trainingSet.classIndex();
        Instances orderedTrainingSet = null;
        List<String> properOrder = new ArrayList<String>(numAttributes);
        for (int j = 0; j < numAttributes; j++) {
            if (j != classIndex) {
                properOrder.add(trainingSet.attribute(j).name());
            }
        }
        properOrder.add(trainingSet.attribute(classIndex).name());
        try {
            orderedTrainingSet = InstancesUtil.sortByFeatureAttrList(properOrder, trainingSet, -1);
        } catch (JaqpotException ex) {
            logger.error("Improper dataset - training will stop", ex);
            throw ex;
        }
        orderedTrainingSet.setClass(orderedTrainingSet.attribute(targetUri.toString()));

        /* START CONSTRUCTION OF MODEL */
        Model m = new Model(Configuration.getBaseUri().augment("model", getUuid().toString()));
        m.setAlgorithm(getAlgorithm());
        m.setCreatedBy(getTask().getCreatedBy());
        m.setDataset(datasetUri);
        m.addDependentFeatures(dependentFeature);
        try {
            dependentFeature.loadFromRemote();
        } catch (ServiceInvocationException ex) {
            Logger.getLogger(MlrRegression.class.getName()).log(Level.SEVERE, null, ex);
        }

        Set<LiteralValue> depFeatTitles = null;
        if (dependentFeature.getMeta() != null) {
            depFeatTitles = dependentFeature.getMeta().getTitles();
        }

        String depFeatTitle = dependentFeature.getUri().toString();
        if (depFeatTitles != null) {
            depFeatTitle = depFeatTitles.iterator().next().getValueAsString();
            m.getMeta().addTitle("MLR model for " + depFeatTitle)
                    .addDescription("MLR model for the prediction of " + depFeatTitle + " (uri: "
                            + dependentFeature.getUri() + " ).");
        } else {
            m.getMeta().addTitle("MLR model for the prediction of the feature with URI " + depFeatTitle)
                    .addComment("No name was found for the feature " + depFeatTitle);
        }

        /*
         * COMPILE THE LIST OF INDEPENDENT FEATURES with the exact order in which
         * these appear in the Instances object (training set).
         */
        m.setIndependentFeatures(independentFeatures);

        /* CREATE PREDICTED FEATURE AND POST IT TO REMOTE SERVER */
        String predictionFeatureUri = null;
        Feature predictedFeature = publishFeature(m, dependentFeature.getUnits(),
                "Predicted " + depFeatTitle + " by MLR model", datasetUri, featureService);
        m.addPredictedFeatures(predictedFeature);
        predictionFeatureUri = predictedFeature.getUri().toString();

        getTask().getMeta().addComment("Prediction feature " + predictionFeatureUri + " was created.");

        firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        /* ACTUAL TRAINING OF THE MODEL USING WEKA */
        LinearRegression linreg = new LinearRegression();
        String[] linRegOptions = { "-S", "1", "-C" };

        try {
            linreg.setOptions(linRegOptions);
            linreg.buildClassifier(orderedTrainingSet);

        } catch (final Exception ex) {// illegal options or could not build the classifier!
            String message = "MLR Model could not be trained";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }

        try {
            // evaluate classifier and print some statistics
            Evaluation eval = new Evaluation(orderedTrainingSet);
            eval.evaluateModel(linreg, orderedTrainingSet);
            String stats = eval.toSummaryString("\nResults\n======\n", false);

            ActualModel am = new ActualModel(linreg);
            am.setStatistics(stats);
            m.setActualModel(am);
        } catch (NotSerializableException ex) {
            String message = "Model is not serializable";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        } catch (final Exception ex) {// illegal options or could not build the classifier!
            String message = "MLR Model could not be trained";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        }

        m.getMeta().addPublisher("OpenTox").addComment("This is a Multiple Linear Regression Model");

        //save the instances being predicted to abstract trainer for calculating DoA
        predictedInstances = orderedTrainingSet;
        excludeAttributesDoA.add(dependentFeature.getUri().toString());

        return m;
    } catch (QSARException ex) {
        String message = "QSAR Exception: cannot train MLR model";
        logger.error(message, ex);
        throw new JaqpotException(message, ex);
    }
}

From source file:org.opentox.qsar.processors.trainers.regression.MLRTrainer.java

License:Open Source License

/**
 * Trains the MLR model given an Instances object with the training data. The prediction
 * feature (class attributre) is specified in the constructor of the class.
 * @param data The training data as <code>weka.core.Instances</code> object.
 * @return The QSARModel corresponding to the trained model.
 * @throws QSARException In case the model cannot be trained
 * <p>/* w  w w .  ja va  2s  .  c o m*/
 * <table>
 * <thead>
 * <tr>
 * <td><b>Code</b></td><td><b>Explanation</b></td>
 * </tr>
 * </thead>
 * <tbody>
 * <tr>
 * <td>XQReg1</td><td>Could not train the an model</td>
 * </tr>
 * <tr>
 * <td>XQReg2</td><td>Could not generate PMML representation for the model</td>
 * </tr>
 * <tr>
 * <td>XQReg202</td><td>The prediction feature you provided is not a valid numeric attribute of the dataset</td>
 * </tr>
 * </tbody>
 * </table>
 * </p>
 * @throws NullPointerException
 *      In case the provided training data is null.
 */
public QSARModel train(Instances data) throws QSARException {

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
    } catch (final IOException ex) {
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    LinearRegression linreg = new LinearRegression();
    String[] linRegOptions = { "-S", "1", "-C" };
    try {
        linreg.setOptions(linRegOptions);
        linreg.buildClassifier(data);
    } catch (final Exception ex) {// illegal options or could not build the classifier!
        String message = "MLR Model could not be trained";
        YaqpLogger.LOG.log(new Trace(getClass(), message + " :: " + ex));
        throw new QSARException(Cause.XQReg1, message, ex);
    }

    try {
        generatePMML(linreg, data);
    } catch (final YaqpIOException ex) {
        String message = "Could not generate PMML representation for MLR model :: " + ex;
        throw new QSARException(Cause.XQReg2, message, ex);
    }

    // PERFORM THE TRAINING
    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", ServerFolders.models_weka + "/" + uuid };
    try {
        Evaluation.evaluateModel(linreg, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex);
    }

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;

    QSARModel model = new QSARModel(uuid.toString(), predictedFeature, dependentFeature, independentFeatures,
            YaqpAlgorithms.MLR, new User(), null, datasetUri, ModelStatus.UNDER_DEVELOPMENT);
    model.setParams(new HashMap<String, AlgorithmParameter>());

    return model;

}

From source file:Prediccion.Prediccion.java

License:Open Source License

@Override
public void run() {
    try {/*from   w w w  .  j  a  v  a 2s.c  om*/

        ArrayList<Instances> pasos = cargarDatos();

        System.err.println(pasos.size());

        //Instanciamos el predictor
        ArrayList<WekaForecaster> forecaster = new ArrayList<>(24);

        for (int a = 0; a < 24; a++) {
            forecaster.add(new WekaForecaster());
        }

        int a = 0;

        for (WekaForecaster fore : forecaster) {

            //Defimimos el atributo que queremos predecir
            fore.setFieldsToForecast("Total");

            //Definimos el mtodo de prediccin a emplear. En este caso, regresin lineal porque 
            //en el artculo es el que mejor ha funcionado
            fore.setBaseForecaster(new LinearRegression());

            //Defimimos el atributo que "marca" el tiempo y su peridiocidad
            fore.getTSLagMaker().setTimeStampField("Intervalo");
            fore.getTSLagMaker().setMinLag(1);
            fore.getTSLagMaker().setMaxLag(1);

            fore.getTSLagMaker().setPeriodicity(TSLagMaker.Periodicity.WEEKLY);

            fore.buildForecaster(pasos.get(a), System.out);

            // System.err.println(pasos.get(a).toString());

            //System.err.printf("Termin");

            fore.primeForecaster(pasos.get(a));

            List<List<NumericPrediction>> forecast = fore.forecast(1, System.out);

            System.err.println("==== " + a + " ====");
            // output the predictions. Outer list is over the steps; inner list is over
            // the targets
            for (int i = 0; i < 1; i++) {
                List<NumericPrediction> predsAtStep = forecast.get(i);
                for (int j = 0; j < 1; j++) {
                    NumericPrediction predForTarget = predsAtStep.get(j);
                    System.err.print("" + predForTarget.predicted() + " ");
                }
                System.err.println();
            }
            a++;
        }
        /*    
                    
           // path to the Australian wine data included with the time series forecasting
           // package
           String pathToWineData = weka.core.WekaPackageManager.PACKAGES_DIR.toString()
             + File.separator + "timeseriesForecasting" + File.separator + "sample-data"
             + File.separator + "wine.arff";
                
           // load the wine data
           Instances wine = new Instances(new BufferedReader(new FileReader(pathToWineData)));      
                   
           // new forecaster
           WekaForecaster forecaster = new WekaForecaster();
                
           // set the targets we want to forecast. This method calls
           // setFieldsToLag() on the lag maker object for us
           forecaster.setFieldsToForecast("Fortified,Dry-white");
                
           // default underlying classifier is SMOreg (SVM) - we'll use
           // gaussian processes for regression instead
           forecaster.setBaseForecaster(new GaussianProcesses());
                
           forecaster.getTSLagMaker().setTimeStampField("Date"); // date time stamp
           forecaster.getTSLagMaker().setMinLag(1);
           forecaster.getTSLagMaker().setMaxLag(12); // monthly data
                
           // add a month of the year indicator field
           forecaster.getTSLagMaker().setAddMonthOfYear(true);
                
           // add a quarter of the year indicator field
           forecaster.getTSLagMaker().setAddQuarterOfYear(true);
                
           // build the model
           forecaster.buildForecaster(wine, System.out);
                
           // prime the forecaster with enough recent historical data
           // to cover up to the maximum lag. In our case, we could just supply
           // the 12 most recent historical instances, as this covers our maximum
           // lag period
           forecaster.primeForecaster(wine);
                
           // forecast for 12 units (months) beyond the end of the
           // training data
           <<List<List<NumericPrediction>> forecast = forecaster.forecast(12, System.out);
                
                   
                   
           // output the predictions. Outer list is over the steps; inner list is over
           // the targets
           for (int i = 0; i < 12; i++) {
             List<NumericPrediction> predsAtStep = forecast.get(i);
             for (int j = 0; j < 2; j++) {
               NumericPrediction predForTarget = predsAtStep.get(j);
               System.out.print("" + predForTarget.predicted() + " ");
             }
             System.out.println();
           }
                
           // we can continue to use the trained forecaster for further forecasting
           // by priming with the most recent historical data (as it becomes available).
           // At some stage it becomes prudent to re-build the model using current
           // historical data.
        */
    } catch (Exception ex) {
        ex.printStackTrace();
    }

}

From source file:src.BestFirst.java

License:Open Source License

/**
 * Searches the attribute subset space by best first search
 *
 * @param data the training instances.//from  w  w  w.ja  v a 2  s.c  o  m
 * @return an array (not necessarily ordered) of selected attribute indexes
 * @throws Exception if the search can't be completed
 */
public int[] search(Instances data, TSLagMaker tsLagMaker, List<String> overlayFields) throws Exception {
    long startTime = System.currentTimeMillis(), stopTime;
    TSWrapper tsWrapper = new TSWrapper();
    tsWrapper.buildEvaluator(data);
    String m_EvaluationMeasure = "RMSE";
    tsWrapper.setM_EvaluationMeasure(m_EvaluationMeasure);
    System.out.println("Using " + m_EvaluationMeasure + " as a evaluation Measure");
    LinearRegression linearRegression = new LinearRegression();
    linearRegression.setOptions(weka.core.Utils.splitOptions("-S 1 -R 1E-6"));

    MLPRegressor mlpRegressor = new MLPRegressor();
    mlpRegressor.setOptions(weka.core.Utils.splitOptions("-P 5 -E 5 -N 2"));
    tsWrapper.setM_BaseClassifier(mlpRegressor);
    System.out.println("Using best First and MLPReg as classifier.");
    m_numAttribs = data.numAttributes();
    SubsetHandler subsetHandler = new SubsetHandler();
    subsetHandler.setM_numAttribs(m_numAttribs);
    m_totalEvals = 0;
    int i, j;
    int best_size = 0;
    int size = 0;
    int done;
    int searchDirection = m_searchDirection;
    BitSet best_group, temp_group;
    int stale;
    double best_merit;
    double merit;
    boolean z;
    boolean added;
    Double bias = 0.;
    Hashtable<String, Double> lookForExistingSubsets = new Hashtable<String, Double>();
    int insertCount = 0;
    LinkedList2 prioQueueList = new LinkedList2(m_maxStale);
    best_merit = -Double.MAX_VALUE;
    stale = 0;
    int startSetPercentage = 0;
    best_group = subsetHandler.getStartSet(startSetPercentage);

    m_startRange.setUpper(m_numAttribs - 1);
    if (!(getStartSet().equals("")))
        m_starting = m_startRange.getSelection();
    // If a starting subset has been supplied, then initialise the bitset
    if (m_starting != null) {
        for (i = 0; i < m_starting.length; i++)
            if ((m_starting[i]) != m_classIndex)
                best_group.set(m_starting[i]);
        best_size = m_starting.length;
        m_totalEvals++;
    } else {
        if (m_searchDirection == SELECTION_BACKWARD) {
            //setStartSet("1-last");
            //m_starting = new int[m_numAttribs];
            // init initial subset to all attributes
            for (i = 11, j = 0; i < m_numAttribs; i++) {
                if (i != m_classIndex) {
                    best_group.set(i);
                    //m_starting[j++] = i;
                }
            }
            best_size = m_numAttribs - 1;
            m_totalEvals++;
        }
    }
    // evaluate the initial subset
    best_merit = -tsWrapper.evaluateSubset(best_group, tsLagMaker, overlayFields, false);
    //printGroup(best_group, m_numAttribs);
    System.out.println("Merit:" + best_merit);
    System.out.print("Group: ");
    subsetHandler.printGroup(best_group);
    System.out.println("\n");
    m_totalEvals++;
    // add the initial group to the list and the hash table
    Object[] best = new Object[1];
    best[0] = best_group.clone();
    prioQueueList.addToList(best, best_merit);
    String hashedGroup = best_group.toString();
    lookForExistingSubsets.put(hashedGroup, new Double(best_merit));
    System.out.println("StartsetPercentage:" + startSetPercentage + ", maxStale:" + m_maxStale);

    while (stale < m_maxStale) {
        added = false;
        if (m_searchDirection == SELECTION_BIDIRECTIONAL) {
            // bi-directional search
            done = 2;
            searchDirection = SELECTION_FORWARD;
        } else {
            done = 1;
        }
        // finished search?
        if (prioQueueList.size() == 0) {
            stale = m_maxStale;
            break;
        }
        // copy the attribute set at the head of the list
        temp_group = (BitSet) (prioQueueList.getLinkAt(0).getData()[0]);
        temp_group = (BitSet) temp_group.clone();
        // remove the head of the list
        prioQueueList.removeLinkAt(0);
        // count the number of bits set (attributes)
        int kk;
        for (kk = 0, size = 0; kk < m_numAttribs; kk++)
            if (temp_group.get(kk))
                size++;
        do {
            for (i = 11; i < m_numAttribs - 2; i++) { //setting it to 11 to skip overlay fields, time stamps etc.
                if (searchDirection == SELECTION_FORWARD)
                    z = ((i != m_classIndex) && (!temp_group.get(i)));
                else
                    z = ((i != m_classIndex) && (temp_group.get(i)));
                if (z) {
                    // set the bit (attribute to add/delete)
                    if (searchDirection == SELECTION_FORWARD) {
                        temp_group.set(i);
                        size++;
                    } else {
                        temp_group.clear(i);
                        size--;
                    }
                    /*
                     * if this subset has been seen before, then it is already in the
                     * list (or has been fully expanded)
                    */
                    hashedGroup = temp_group.toString();

                    if (lookForExistingSubsets.containsKey(hashedGroup) == false) {
                        //System.out.println("Before eval:" + temp_group);
                        merit = -tsWrapper.evaluateSubset(temp_group, tsLagMaker, overlayFields, false);
                        System.out.println("Merit: " + merit);
                        System.out.print("Group: ");

                        subsetHandler.printGroup(temp_group);
                        System.out.println("\n");
                        m_totalEvals++;

                        hashedGroup = temp_group.toString();
                        lookForExistingSubsets.put(hashedGroup, new Double(merit));
                        insertCount++;
                        // insert this one in the list

                    } else
                        merit = lookForExistingSubsets.get(hashedGroup).doubleValue();
                    Object[] add = new Object[1];
                    add[0] = temp_group.clone();
                    prioQueueList.addToList(add, merit);
                    if (m_debug) {
                        System.out.print("Group: ");
                        subsetHandler.printGroup(temp_group);
                        System.out.println("Merit: " + merit);
                    }

                    // is this better than the best?
                    if (searchDirection == SELECTION_FORWARD) {
                        z = (merit - best_merit) > 0.01; //they are both negative numbers; actually we are looking for the smallest error
                    } else {
                        if (merit == best_merit) {
                            z = (size < best_size);
                        } else {
                            z = (merit > best_merit);
                        }
                    }

                    if (z) {
                        added = true;
                        stale = 0;
                        System.out.println("Setting best merit to:" + merit);
                        best_merit = merit;
                        // best_size = (size + best_size);
                        best_size = size;
                        best_group = (BitSet) (temp_group.clone());
                    }

                    // unset this addition(deletion)
                    if (searchDirection == SELECTION_FORWARD) {
                        temp_group.clear(i);
                        size--;
                    } else {
                        temp_group.set(i);
                        size++;
                    }
                }
            }
            if (done == 2)
                searchDirection = SELECTION_BACKWARD;
            done--;
        } while (done > 0);
        /* if we haven't added a new attribute subset then full expansion of this
        * node hasen't resulted in anything better
        */
        if (!added) {
            stale++;
            System.out.println("Stale:" + stale);
        }
    }
    subsetHandler.printGroup(best_group);
    System.out.println("Best merit: " + best_merit);
    System.out.println(m_totalEvals);
    stopTime = System.currentTimeMillis();
    System.out.println("Time taken for wrapper part:" + ((double) stopTime - startTime) / 1000);
    m_bestMerit = best_merit;
    subsetHandler.includesMoreThanXPercentOfFeatures(best_group, true, 0);
    tsWrapper.evaluateSubset(best_group, tsLagMaker, overlayFields, true);
    return attributeList(best_group);
}

From source file:src.SimmulatedAnnealing.java

License:Open Source License

/**
 * Searches the attribute subset space by best first search
 *
 * @param data the training instances.//ww  w .j  ava  2  s. c o  m
 * @return an array (not necessarily ordered) of selected attribute indexes
 * @throws Exception if the search can't be completed
 */
public int[] search(Instances data, TSLagMaker tsLagMaker, List<String> overlayFields) throws Exception {
    long startTime = System.currentTimeMillis(), stopTime;
    m_totalEvals = 0;
    int m_totalEvals = 0;
    TSWrapper tsWrapper = new TSWrapper();
    tsWrapper.buildEvaluator(data);
    String m_EvaluationMeasure = "RMSE";
    tsWrapper.setM_EvaluationMeasure(m_EvaluationMeasure);
    System.out.println("Using " + m_EvaluationMeasure + " as a evaluation Measure");
    LinearRegression linearRegression = new LinearRegression();
    linearRegression.setOptions(weka.core.Utils.splitOptions("-S 1 -R 1E-6"));
    MLPRegressor mlpRegressor = new MLPRegressor();
    mlpRegressor.setOptions(weka.core.Utils.splitOptions("-P 4 -E 4 -N 2"));
    tsWrapper.setM_BaseClassifier(mlpRegressor);
    System.out.println("Using SA and MLPRegressor as classifier.");
    m_numAttribs = data.numAttributes();
    SubsetHandler subsetHandler = new SubsetHandler();
    subsetHandler.setM_numAttribs(m_numAttribs);
    BitSet best_group;
    best_group = subsetHandler.getStartSet(0);
    double temperature = 0.4, initialTemp = temperature, dropRate = 0.00012, limit = 0.0000001;
    double best_merit;
    int changedAltoughWorseCounter = 0;
    Hashtable<String, Double> lookForExistingSubsets = new Hashtable<String, Double>();
    // evaluate the initial subset
    subsetHandler.printGroup(best_group);
    best_merit = -tsWrapper.evaluateSubset(best_group, tsLagMaker, overlayFields, false);
    m_totalEvals++;
    String subset_string = best_group.toString();
    lookForExistingSubsets.put(subset_string, best_merit);
    System.out.println("Initial group w/ numAttribs: " + m_numAttribs + " temp: " + temperature + " drop rate:"
            + dropRate + " limit:" + limit);
    System.out.println("Merit: " + best_merit);
    TheVeryBest theVeryBest = new TheVeryBest((BitSet) best_group.clone(), best_merit);
    ArrayList<Boolean> changedAlthoughWorse = new ArrayList<Boolean>();
    while (temperature > limit) {
        changedAltoughWorseCounter = 0;
        BitSet s_new = subsetHandler.changeBits((BitSet) best_group.clone(), 1);
        subset_string = s_new.toString();
        if (!lookForExistingSubsets.containsKey(subset_string)) {
            double s_new_merit = -tsWrapper.evaluateSubset(s_new, tsLagMaker, overlayFields, false);
            m_totalEvals++;
            System.out.println("New merit: " + s_new_merit);
            lookForExistingSubsets.put(subset_string, s_new_merit);
            if (decisionFunction(s_new_merit - best_merit, temperature, best_merit, initialTemp)) {
                if (best_merit - s_new_merit > 0) //it means this is a worse set than the best set, and we still change the best set to it.
                    changedAlthoughWorse.add(true);
                best_group = (BitSet) s_new.clone();
                best_merit = s_new_merit;
            } else
                changedAlthoughWorse.add(false);
            for (int j = 0; j < changedAlthoughWorse.size(); j++)
                if (changedAlthoughWorse.get(j))
                    changedAltoughWorseCounter++;
            System.out.println("Percentage of worse sets accepted:"
                    + (float) changedAltoughWorseCounter * 100 / changedAlthoughWorse.size()
                    + " Arraylist size:" + changedAlthoughWorse.size() + " changedAlthoughworse counter:"
                    + changedAltoughWorseCounter);
            if (best_merit > theVeryBest.getMerit()) //we have negative values for the scores, so bigger is better
                theVeryBest.setNewSet((BitSet) best_group.clone(), best_merit);
            temperature = temperature / (float) (1 + dropRate * (m_totalEvals - 1));
        }
    }
    System.out.println("Best merit: " + theVeryBest.getMerit());
    System.out.println(m_totalEvals);
    stopTime = System.currentTimeMillis();
    System.out.println("Time taken for wrapper part:" + ((double) stopTime - startTime) / 1000);
    subsetHandler.printGroup(theVeryBest.getSubset());
    subsetHandler.includesMoreThanXPercentOfFeatures(theVeryBest.getSubset(), true, 0);
    tsWrapper.evaluateSubset(theVeryBest.getSubset(), tsLagMaker, overlayFields, true);
    return attributeList(theVeryBest.getSubset());
}

From source file:textmining.TextMining.java

private static String Regression_on_Polarity(Instances instances) throws Exception {
    Classifier regression = (Classifier) new LinearRegression();
    String[] options = weka.core.Utils.splitOptions("-S 2 -R 1.0E-8");
    return setOptions(regression, instances, options);
}

From source file:wekimini.learning.LinearRegressionModelBuilder.java

public LinearRegressionModelBuilder() {
    classifier = new LinearRegression();
    featureSelectionType = FeatureSelectionType.NONE;
    ((LinearRegression) classifier).setAttributeSelectionMethod(
            new SelectedTag(LinearRegression.SELECTION_NONE, LinearRegression.TAGS_SELECTION));
    ((LinearRegression) classifier).setEliminateColinearAttributes(removeColinear);
}