Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity) 

Source Link

Document

Creates an empty set of instances.

Usage

From source file:Bilbo.java

License:Open Source License

/**
 * Returns a training set for a particular iteration.
 * // ww  w . ja va  2s .  com
 * @param iteration the number of the iteration for the requested training set.
 * @return the training set for the supplied iteration number
 * @throws Exception if something goes wrong when generating a training set.
 */
@Override
protected synchronized Instances getTrainingSet(Instances p_data, int iteration) throws Exception {
    int bagSize = (int) (p_data.numInstances() * (m_BagSizePercent / 100.0));
    Instances bagData = null;
    Random r = new Random(m_Seed + iteration);

    // create the in-bag dataset
    if (m_CalcOutOfBag && p_data.classIndex() != -1) {
        m_inBag[iteration] = new boolean[p_data.numInstances()];
        bagData = p_data.resampleWithWeights(r, m_inBag[iteration], getRepresentCopiesUsingWeights());
    } else {
        bagData = p_data.resampleWithWeights(r, getRepresentCopiesUsingWeights());
        if (bagSize < p_data.numInstances()) {
            bagData.randomize(r);
            Instances newBagData = new Instances(bagData, 0, bagSize);
            bagData = newBagData;
        }
    }

    return bagData;
}

From source file:DocClassifier.java

public Instances createInstances(File[] files) {
    Instances instances = new Instances("Inst" + files.hashCode(), attrList, files.length);
    for (File file : files) {
        Instance inst = createInstance(file);
        inst.setDataset(instances);//from   www  . j a  va 2 s  . c o  m
        instances.add(inst);
        instances.setClass((Attribute) attrList.lastElement());
    }
    return instances;
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Set up the header for the PC->original space dataset
 *
 * @return the output format//from  w w  w.  j a v a  2s . c  om
 * @throws Exception if something goes wrong
 */
private Instances setOutputFormatOriginal() throws Exception {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();

    for (int i = 0; i < m_numAttribs; i++) {
        String att = m_trainInstances.attribute(i).name();
        attributes.add(new Attribute(att));
    }

    if (m_hasClass) {
        attributes.add((Attribute) m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainHeader.relationName() + "->PC->original space", attributes,
            0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    return outputFormat;
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Set the format for the transformed data
 *
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 *//* w w  w.j a v a2 s  .co m*/
private Instances setOutputFormat() throws Exception {
    if (m_eigenvalues == null) {
        return null;
    }

    double cumulative = 0.0;
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        StringBuffer attName = new StringBuffer();
        // build array of coefficients
        double[] coeff_mags = new double[m_numAttribs];
        for (int j = 0; j < m_numAttribs; j++) {
            coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
        }
        int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
        // this array contains the sorted indices of the coefficients
        int[] coeff_inds;
        if (m_numAttribs > 0) {
            // if m_maxAttrsInName > 0, sort coefficients by decreasing
            // magnitude
            coeff_inds = Utils.sort(coeff_mags);
        } else {
            // if m_maxAttrsInName <= 0, use all coeffs in original order
            coeff_inds = new int[m_numAttribs];
            for (int j = 0; j < m_numAttribs; j++) {
                coeff_inds[j] = j;
            }
        }
        // build final attName string
        for (int j = 0; j < num_attrs; j++) {
            double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
            if (j > 0 && coeff_value >= 0) {
                attName.append("+");
            }
            attName.append(
                    Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name());
        }
        if (num_attrs < m_numAttribs) {
            attName.append("...");
        }

        attributes.add(new Attribute(attName.toString()));
        cumulative += m_eigenvalues[m_sortedEigens[i]];

        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (m_hasClass) {
        attributes.add((Attribute) m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components",
            attributes, 0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    m_outputNumAtts = outputFormat.numAttributes();
    return outputFormat;
}

From source file:TextDirectoryLoader.java

License:Open Source License

/**
 * Determines and returns (if possible) the structure (internally the 
 * header) of the data set as an empty set of instances.
 *
 * @return          the structure of the data set as an empty 
 *             set of Instances/*  w  w  w. j av a 2  s. com*/
 * @throws IOException    if an error occurs
 */
public Instances getStructure() throws IOException {
    if (getDirectory() == null) {
        throw new IOException("No directory/source has been specified");
    }

    // determine class labels, i.e., sub-dirs
    if (m_structure == null) {
        String directoryPath = getDirectory().getAbsolutePath();
        FastVector atts = new FastVector();
        FastVector classes = new FastVector();

        File dir = new File(directoryPath);
        String[] subdirs = dir.list();

        for (int i = 0; i < subdirs.length; i++) {
            File subdir = new File(directoryPath + File.separator + subdirs[i]);
            if (subdir.isDirectory())
                classes.addElement(subdirs[i]);
        }

        atts.addElement(new Attribute("text", (FastVector) null));
        if (m_OutputFilename)
            atts.addElement(new Attribute("filename", (FastVector) null));
        // make sure that the name of the class attribute is unlikely to 
        // clash with any attribute created via the StringToWordVector filter
        atts.addElement(new Attribute("@@class@@", classes));

        String relName = directoryPath.replaceAll("/", "_");
        relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_");
        m_structure = new Instances(relName, atts, 0);
        m_structure.setClassIndex(m_structure.numAttributes() - 1);
    }

    return m_structure;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set.//from   ww  w.  ja  va 2  s. co  m
 *
 * @return the structure of the data set as an empty set of Instances
 * @exception IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (m_data == null) {
        throw new IOException("No source has been specified");
    }

    if (m_structure == null) {
        getStructure();
    }

    m_cumulativeStructure = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        m_cumulativeStructure.addElement(new Hashtable());
    }

    m_cumulativeInstances = new FastVector();
    FastVector current;

    for (int i = 0; i < m_data.length; i++) {
        current = getInstance(m_data[i]);

        m_cumulativeInstances.addElement(current);
    }

    FastVector atts = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        String attname = m_structure.attribute(i).name();
        Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i));
        if (tempHash.size() == 0) {
            atts.addElement(new Attribute(attname));
        } else {
            if (m_StringAttributes.isInRange(i)) {
                atts.addElement(new Attribute(attname, (FastVector) null));
            } else {
                FastVector values = new FastVector(tempHash.size());
                // add dummy objects in order to make the FastVector's size == capacity
                for (int z = 0; z < tempHash.size(); z++) {
                    values.addElement("dummy");
                }
                Enumeration e = tempHash.keys();
                while (e.hasMoreElements()) {
                    Object ob = e.nextElement();
                    //     if (ob instanceof Double) {
                    int index = ((Integer) tempHash.get(ob)).intValue();
                    String s = ob.toString();
                    if (s.startsWith("'") || s.startsWith("\""))
                        s = s.substring(1, s.length() - 1);
                    values.setElementAt(new String(s), index);
                    //     }
                }
                atts.addElement(new Attribute(attname, values));
            }
        }
    }

    // make the instances
    String relationName;
    relationName = "ArrayData";
    Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size());

    for (int i = 0; i < m_cumulativeInstances.size(); i++) {
        current = ((FastVector) m_cumulativeInstances.elementAt(i));
        double[] vals = new double[dataSet.numAttributes()];
        for (int j = 0; j < current.size(); j++) {
            Object cval = current.elementAt(j);
            if (cval instanceof String) {
                if (((String) cval).compareTo(m_MissingValue) == 0) {
                    vals[j] = Instance.missingValue();
                } else {
                    if (dataSet.attribute(j).isString()) {
                        vals[j] = dataSet.attribute(j).addStringValue((String) cval);
                    } else if (dataSet.attribute(j).isNominal()) {
                        // find correct index
                        Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                        int index = ((Integer) lookup.get(cval)).intValue();
                        vals[j] = index;
                    } else {
                        throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!");
                    }
                }
            } else if (dataSet.attribute(j).isNominal()) {
                // find correct index
                Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                int index = ((Integer) lookup.get(cval)).intValue();
                vals[j] = index;
            } else if (dataSet.attribute(j).isString()) {
                vals[j] = dataSet.attribute(j).addStringValue("" + cval);
            } else {
                vals[j] = ((Double) cval).doubleValue();
            }
        }
        dataSet.add(new Instance(1.0, vals));
    }
    m_structure = new Instances(dataSet, 0);
    m_cumulativeStructure = null; // conserve memory

    return dataSet;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Assumes the first line of the file contains the attribute names.
 * Assumes all attributes are real (Reading the full data set with
 * getDataSet will establish the true structure).
 *
 *///ww  w .j  ava2s.c om
private void readHeader(String[] column) throws IOException {

    FastVector attribNames = new FastVector();

    // Assume first row of data are the column titles
    for (int i = 0; i < column.length; i++) {
        attribNames.addElement(new Attribute(column[i]));
    }

    m_structure = new Instances("DataArray", attribNames, 0);
}

From source file:FlexDMThread.java

License:Open Source License

public void run() {
    try {/*from  ww  w.j  a  v a 2 s . c  o m*/
        //Get the data from the source

        FlexDM.getMainData.acquire();
        Instances data = dataset.getSource().getDataSet();
        FlexDM.getMainData.release();

        //Set class attribute if undefined
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }

        //Process hyperparameters for classifier
        String temp = "";
        for (int i = 0; i < classifier.getNumParams(); i++) {
            temp += classifier.getParameter(i).getName();
            temp += " ";
            if (classifier.getParameter(i).getValue() != null) {
                temp += classifier.getParameter(i).getValue();
                temp += " ";
            }
        }

        String[] options = weka.core.Utils.splitOptions(temp);

        //Print to console- experiment is starting
        if (temp.equals("")) { //no parameters
            temp = "results_no_parameters";
            try {
                System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1)
                        + " with no parameters");
            } catch (Exception e) {
                System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName() + " with no parameters");
            }
        } else { //parameters
            try {
                System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1)
                        + " with parameters " + temp);
            } catch (Exception e) {
                System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName() + " with parameters " + temp);
            }
        }

        //Create classifier, setting parameters
        weka.classifiers.Classifier x = createObject(classifier.getName());
        x.setOptions(options);
        x.buildClassifier(data);

        //Process the test selection
        String[] tempTest = dataset.getTest().split("\\s");

        //Create evaluation object for training and testing classifiers
        Evaluation eval = new Evaluation(data);
        StringBuffer predictions = new StringBuffer();

        //Train and evaluate classifier
        if (tempTest[0].equals("testset")) { //specified test file
            //Build classifier
            x.buildClassifier(data);

            //Open test file, load data
            //DataSource testFile = new DataSource(dataset.getTest().substring(7).trim());
            // Instances testSet = testFile.getDataSet();
            FlexDM.getTestData.acquire();
            Instances testSet = dataset.getTestFile().getDataSet();
            FlexDM.getTestData.release();

            //Set class attribute if undefined
            if (testSet.classIndex() == -1) {
                testSet.setClassIndex(testSet.numAttributes() - 1);
            }

            //Evaluate model
            Object[] array = { predictions, new Range(), new Boolean(true) };
            eval.evaluateModel(x, testSet, array);
        } else if (tempTest[0].equals("xval")) { //Cross validation
            //Build classifier
            x.buildClassifier(data);

            //Cross validate
            eval.crossValidateModel(x, data, Integer.parseInt(tempTest[1]), new Random(1), predictions,
                    new Range(), true);
        } else if (tempTest[0].equals("leavexval")) { //Leave one out cross validation
            //Build classifier
            x.buildClassifier(data);

            //Cross validate
            eval.crossValidateModel(x, data, data.numInstances() - 1, new Random(1), predictions, new Range(),
                    true);
        } else if (tempTest[0].equals("percent")) { //Percentage split of single data set
            //Set training and test sizes from percentage
            int trainSize = (int) Math.round(data.numInstances() * Double.parseDouble(tempTest[1]));
            int testSize = data.numInstances() - trainSize;

            //Load specified data
            Instances train = new Instances(data, 0, trainSize);
            Instances testSet = new Instances(data, trainSize, testSize);

            //Build classifier
            x.buildClassifier(train);

            //Train and evaluate model
            Object[] array = { predictions, new Range(), new Boolean(true) };
            eval.evaluateModel(x, testSet, array);
        } else { //Evaluate on training data
            //Test and evaluate model
            Object[] array = { predictions, new Range(), new Boolean(true) };
            eval.evaluateModel(x, data, array);
        }

        //create datafile for results
        String filename = dataset.getDir() + "/" + classifier.getDirName() + "/" + temp + ".txt";
        PrintWriter writer = new PrintWriter(filename, "UTF-8");

        //Print classifier, dataset, parameters info to file
        try {
            writer.println("CLASSIFIER: " + classifier.getName() + "\n DATASET: " + dataset.getName()
                    + "\n PARAMETERS: " + temp);
        } catch (Exception e) {
            writer.println("CLASSIFIER: " + classifier.getName() + "\n DATASET: " + dataset.getName()
                    + "\n PARAMETERS: " + temp);
        }

        //Add evaluation string to file
        writer.println(eval.toSummaryString());
        //Process result options
        if (checkResults("stats")) { //Classifier statistics
            writer.println(eval.toClassDetailsString());
        }
        if (checkResults("model")) { //The model
            writer.println(x.toString());
        }
        if (checkResults("matrix")) { //Confusion matrix
            writer.println(eval.toMatrixString());
        }
        if (checkResults("entropy")) { //Entropy statistics
            //Set options req'd to get the entropy stats
            String[] opt = new String[4];
            opt[0] = "-t";
            opt[1] = dataset.getName();
            opt[2] = "-k";
            opt[3] = "-v";

            //Evaluate model
            String entropy = Evaluation.evaluateModel(x, opt);

            //Grab the relevant info from the results, print to file
            entropy = entropy.substring(entropy.indexOf("=== Stratified cross-validation ===") + 35,
                    entropy.indexOf("=== Confusion Matrix ==="));
            writer.println("=== Entropy Statistics ===");
            writer.println(entropy);
        }
        if (checkResults("predictions")) { //The models predictions
            writer.println("=== Predictions ===\n");
            if (!dataset.getTest().contains("xval")) { //print header of predictions table if req'd
                writer.println(" inst#     actual  predicted error distribution ()");
            }
            writer.println(predictions.toString()); //print predictions to file
        }

        writer.close();

        //Summary file is semaphore controlled to ensure quality
        try { //get a permit
              //grab the summary file, write the classifiers details to it
            FlexDM.writeFile.acquire();
            PrintWriter p = new PrintWriter(new FileWriter(summary, true));
            if (temp.equals("results_no_parameters")) { //change output based on parameters
                temp = temp.substring(8);
            }

            //write percent correct, classifier name, dataset name to summary file
            p.write(dataset.getName() + ", " + classifier.getName() + ", " + temp + ", " + eval.correct() + ", "
                    + eval.incorrect() + ", " + eval.unclassified() + ", " + eval.pctCorrect() + ", "
                    + eval.pctIncorrect() + ", " + eval.pctUnclassified() + ", " + eval.kappa() + ", "
                    + eval.meanAbsoluteError() + ", " + eval.rootMeanSquaredError() + ", "
                    + eval.relativeAbsoluteError() + ", " + eval.rootRelativeSquaredError() + ", "
                    + eval.SFPriorEntropy() + ", " + eval.SFSchemeEntropy() + ", " + eval.SFEntropyGain() + ", "
                    + eval.SFMeanPriorEntropy() + ", " + eval.SFMeanSchemeEntropy() + ", "
                    + eval.SFMeanEntropyGain() + ", " + eval.KBInformation() + ", " + eval.KBMeanInformation()
                    + ", " + eval.KBRelativeInformation() + ", " + eval.weightedTruePositiveRate() + ", "
                    + eval.weightedFalsePositiveRate() + ", " + eval.weightedTrueNegativeRate() + ", "
                    + eval.weightedFalseNegativeRate() + ", " + eval.weightedPrecision() + ", "
                    + eval.weightedRecall() + ", " + eval.weightedFMeasure() + ", "
                    + eval.weightedAreaUnderROC() + "\n");
            p.close();

            //release semaphore
            FlexDM.writeFile.release();
        } catch (InterruptedException e) { //bad things happened
            System.err.println("FATAL ERROR OCCURRED: Classifier: " + cNum + " - " + classifier.getName()
                    + " on dataset " + dataset.getName());
        }

        //output we have successfully finished processing classifier
        if (temp.equals("no_parameters")) { //no parameters
            try {
                System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1)
                        + " with no parameters");
            } catch (Exception e) {
                System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName() + " with no parameters");
            }
        } else { //with parameters
            try {
                System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1)
                        + " with parameters " + temp);
            } catch (Exception e) {
                System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName() + " with parameters " + temp);
            }
        }

        try { //get a permit
              //grab the log file, write the classifiers details to it
            FlexDM.writeLog.acquire();
            PrintWriter p = new PrintWriter(new FileWriter(log, true));

            Date date = new Date();
            Format formatter = new SimpleDateFormat("dd/MM/YYYY HH:mm:ss");
            //formatter.format(date)

            if (temp.equals("results_no_parameters")) { //change output based on parameters
                temp = temp.substring(8);
            }

            //write details to log file
            p.write(dataset.getName() + ", " + dataset.getTest() + ", \"" + dataset.getResult_string() + "\", "
                    + classifier.getName() + ", " + temp + ", " + formatter.format(date) + "\n");
            p.close();

            //release semaphore
            FlexDM.writeLog.release();
        } catch (InterruptedException e) { //bad things happened
            System.err.println("FATAL ERROR OCCURRED: Classifier: " + cNum + " - " + classifier.getName()
                    + " on dataset " + dataset.getName());
        }

        s.release();

    } catch (Exception e) {
        //an error occurred
        System.err.println("FATAL ERROR OCCURRED: " + e.toString() + "\nClassifier: " + cNum + " - "
                + classifier.getName() + " on dataset " + dataset.getName());
        s.release();
    }

}

From source file:Pair.java

License:Open Source License

private void doCV(Instances targetData) throws Exception {
    System.out.println();/*from w  w w .j a  va2 s . c o m*/
    System.out.flush();
    int numSourceInstances = m_SourceInstances.numInstances();
    int numInstances = targetData.numInstances() + numSourceInstances;
    numTargetInstances = numInstances - numSourceInstances;
    double weightSource, weightTarget;
    double initialSourceFraction;
    double[] weights = new double[numInstances];
    Random randomInstance = new Random(1);

    Instances data = new Instances(m_SourceInstances, 0, numSourceInstances);
    // Now add the target data, shallow copying the instances as they are added
    // so it doesn't mess up the weights for anyone else
    Enumeration enumer = targetData.enumerateInstances();
    while (enumer.hasMoreElements()) {
        Instance instance = (Instance) enumer.nextElement();
        data.add(instance);
    }

    if (sourceRatio < 0) { //weight all equally
        weightSource = weightTarget = 1.0/*/numInstances*/;
        initialSourceFraction = numSourceInstances / (double) numInstances;
    } else {
        double totalWeight = 1 + sourceRatio;
        weightSource = sourceRatio / totalWeight/*/numSourceInstances*/;
        weightTarget = 1.0 / totalWeight/*/numTargetInstances*/;
        initialSourceFraction = weightSource;
    }
    for (int j = 0; j < numInstances; j++) {
        Instance instance = data.instance(j);
        if (j < numSourceInstances)
            instance.setWeight(weightSource);
        else
            instance.setWeight(weightTarget);
    }

    if (doFraction) {
        for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) {

            sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular]
            if (sourceFraction > .995)
                sourceFraction = .995;
            //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances;
            double sourceWeight = (sourceFraction * numTargetInstances)
                    / (numSourceInstances * (1 - sourceFraction));
            for (int j = 0; j < numInstances; j++) {
                Instance instance = data.instance(j);
                if (j < numSourceInstances)
                    instance.setWeight(sourceWeight);
                else
                    instance.setWeight(1);
            }
            buildClassifierWithWeights(data);
            System.out.println("Iteration " + it + ":" + getTestError());
        }
    } else {

        for (int i = 0; i < numInstances; i++)
            weights[i] = data.instance(i).weight();
        buildClassifierWithWeights(data);
        System.out.println("Iteration -1:" + getTestError());
        for (int i = 0; i < numInstances; i++)
            data.instance(i).setWeight(weights[i]);

        for (int it = 0; it < sourceIterations; it++) {

            Instances sample = null;
            if (!resample || m_NumIterationsPerformed == 0) {
                sample = data;
            } else {
                double sum = data.sumOfWeights();
                double[] sweights = new double[data.numInstances()];
                for (int i = 0; i < sweights.length; i++) {
                    sweights[i] = data.instance(i).weight() / sum;
                }
                sample = data.resampleWithWeights(randomInstance, sweights);
            }

            try {
                m_Classifiers[it].buildClassifier(sample);
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println("E: " + e);
            }

            sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations);
            setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false);

            for (int i = 0; i < numInstances; i++)
                weights[i] = data.instance(i).weight();

            buildClassifierWithWeights(data);

            System.out.println("Iteration " + it + ":" + getTestError());

            for (int i = 0; i < numInstances; i++)
                data.instance(i).setWeight(weights[i]);

        }

    }

}

From source file:Pair.java

License:Open Source License

/**
 * Boosting method. Boosts any classifier that can handle weighted
 * instances./*ww  w .ja  v a2  s .  co m*/
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @exception Exception if the classifier could not be built successfully
 */
protected void buildClassifierWithWeights(Instances data) throws Exception {

    Random randomInstance = new Random(0);
    double epsilon, reweight, beta = 0;
    Evaluation evaluation;
    Instances sample;
    // Initialize data
    m_Betas = new double[m_Classifiers.length];
    m_NumIterationsPerformed = 0;
    int numSourceInstances = m_SourceInstances.numInstances();

    // Do boostrap iterations
    for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) {
        // Build the classifier
        sample = null;
        if (!resample || m_NumIterationsPerformed == 0) {
            sample = data;
        } else {
            double sum = data.sumOfWeights();
            double[] weights = new double[data.numInstances()];
            for (int i = 0; i < weights.length; i++) {
                weights[i] = data.instance(i).weight() / sum;
            }
            sample = data.resampleWithWeights(randomInstance, weights);

            if (doSampleSize) {
                int effectiveInstances = (int) (sourceFraction * weights.length + numTargetInstances);
                if (effectiveInstances > numSourceInstances + numTargetInstances)
                    effectiveInstances = numSourceInstances + numTargetInstances;
                //System.out.println(effectiveInstances);               
                sample.randomize(randomInstance);
                Instances q = new Instances(sample, 0, effectiveInstances);
                sample = q;
            }
        }
        try {
            m_Classifiers[m_NumIterationsPerformed].buildClassifier(sample);
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("E: " + e);
        }

        if (doBagging)
            beta = 0.4 / .6; //always same beta
        else
            beta = setWeights(data, m_Classifiers[m_NumIterationsPerformed], -1, numSourceInstances, true);

        // Stop if error too small or error too big and ignore this model
        if (beta < 0) { //setWeights indicates a problem with negative beta
            if (m_NumIterationsPerformed == 0) {
                m_NumIterationsPerformed = 1; // If we're the first we have to to use it
            }
            break;
        }

        // Determine the weight to assign to this model

        m_Betas[m_NumIterationsPerformed] = Math.log(1 / beta);

    }

    betaSum = 0;

    for (int i = 0; i < m_NumIterationsPerformed; i++)
        betaSum += m_Betas[i];
}