Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset, int capacity) 

Source Link

Document

Constructor creating an empty set of instances.

Usage

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static void testCOMT2() throws Exception {
    BestConf bestconf = new BestConf();
    Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff");
    trainingSet.setClassIndex(trainingSet.numAttributes() - 1);

    Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(),
            InitialSampleSetSize, false);
    samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes());
    samplePoints.setClassIndex(samplePoints.numAttributes() - 1);

    COMT2 comt = new COMT2(samplePoints, COMT2Iteration);

    comt.buildClassifier(trainingSet);/* ww  w  . ja va 2s .c o  m*/

    Evaluation eval = new Evaluation(trainingSet);
    eval.evaluateModel(comt, trainingSet);
    System.err.println(eval.toSummaryString());

    Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance());
    Instances bestInstances = new Instances(trainingSet, 2);
    bestInstances.add(best);
    DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances);

    //now we output the training set with the class value updated as the predicted value
    Instances output = new Instances(trainingSet, trainingSet.numInstances());
    Enumeration<Instance> enu = trainingSet.enumerateInstances();
    while (enu.hasMoreElements()) {
        Instance ins = enu.nextElement();
        double[] values = ins.toDoubleArray();
        values[values.length - 1] = comt.classifyInstance(ins);
        output.add(ins.copy(values));
    }
    DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output);
}

From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java

License:Open Source License

public COMT2(Instances unlabeledIns, int numOfIterations) {
    rand = new Random();

    this.unlabeldPool = unlabeledIns;
    this.comtIterations = numOfIterations;

    int initialPool = this.unlabeldPool.size() / this.comtIterations;
    this.unlabeledInstances = new Instances(this.unlabeldPool, 0);
    for (int i = 0; i < initialPool; i++)
        this.unlabeledInstances.add(this.unlabeldPool.remove(rand.nextInt(this.unlabeldPool.size())));
    indexOfClass = this.unlabeledInstances.classIndex();

    models = null;//from w  w w  .  j a  va  2  s.  co m
}

From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java

License:Open Source License

private Instances retrieveMore(int toGen) {
    Instances retval = new Instances(this.unlabeldPool, toGen);
    for (int i = 0; i < toGen; i++) {
        retval.add(this.unlabeldPool.remove(rand.nextInt(this.unlabeldPool.size())));
    }// w  w w. j  a  v a  2 s.c o  m
    return retval;
}

From source file:cn.ict.zyq.bestConf.util.DataIOFile.java

License:Open Source License

/**
 * Return the data set loaded from the CSV file at @param path
 *//*from   w w  w  .j a  v a2s.  co  m*/
public static Instances loadDataFromCsvFile(String path) throws IOException {
    CSVLoader loader = new CSVLoader();
    loader.setSource(new File(path));
    Instances data = loader.getDataSet();

    System.out.println("\nHeader of dataset:\n");
    System.out.println(new Instances(data, 0));
    return data;
}

From source file:cn.ict.zyq.bestConf.util.DataIOFile.java

License:Open Source License

/**
 * Return the data set loaded from the Arff file at @param path
 *///ww  w.  ja  v  a2 s. co m
public static Instances loadDataFromArffFile(String path) throws IOException {
    ArffLoader loader = new ArffLoader();
    loader.setSource(new File(path));
    Instances data = loader.getDataSet();

    System.out.println("\nHeader of dataset:\n");
    System.out.println(new Instances(data, 0));
    return data;
}

From source file:cn.ict.zyq.bestConf.util.DataIOFile.java

License:Open Source License

/**
 * Return the data set loaded from the Xrff file at @param path
 *///from   www .j av  a 2 s . co  m
public static Instances loadDataFromXrffFile(String path) throws IOException {
    XRFFLoader loader = new XRFFLoader();
    loader.setSource(new File(path));
    Instances data = loader.getDataSet();

    System.out.println("\nHeader of dataset:\n");
    System.out.println(new Instances(data, 0));
    return data;
}

From source file:com.oprisnik.semdroid.feature.extractor.SemanticPatternAnalysisFeatureExtractor.java

License:Apache License

@Override
public WekaData extract(App app) {
    Instances i = new Instances(mInstances, app.getMethods().size());
    List<Object> linkedObjects = new ArrayList<Object>();
    if (mFeatureLayerGenerator != null) {
        long start = System.currentTimeMillis();
        DatasetDataElement d = mFeatureLayerGenerator.generateFeatureLayers(app);
        Log.d(TAG, FeatureLayerHelper.getFeatures(d));
        long featureLayers = System.currentTimeMillis();
        Log.d(TAG, "Feature layer generation: " + (featureLayers - start) + " ms.");
        mSemanticPatternFramework.getWekaData(d, i, linkedObjects);
        long done = System.currentTimeMillis();
        Log.d(TAG, "Semantic Pattern framework: " + (done - featureLayers) + " ms.");

        Log.d(TAG, "Feature extractor total: " + (done - start) + " ms.");
    }/*from  ww  w .  j  av a2  s  .com*/
    return new WekaData(i, linkedObjects);
}

From source file:com.reactivetechnologies.analytics.core.eval.StackingWithBuiltClassifiers.java

License:Open Source License

/**
 * Generates the meta data//from   w ww .  j  av  a 2s  .co m
 *
 * @param newData the data to work on
 * @param random the random number generator to use for cross-validation
 * @throws Exception if generation fails
 */
@Override
protected void generateMetaLevel(Instances newData, Random random) throws Exception {

    Instances metaData = metaFormat(newData);
    m_MetaFormat = new Instances(metaData, 0);
    for (int j = 0; j < m_NumFolds; j++) {

        /** Changed here */
        //Instances train = newData.trainCV(m_NumFolds, j, random);
        // DO NOT Build base classifiers
        /*for (int i = 0; i < m_Classifiers.length; i++) {
            getClassifier(i).buildClassifier(train);
        }*/
        /** End change */

        // Classify test instances and add to meta data
        Instances test = newData.testCV(m_NumFolds, j);
        for (int i = 0; i < test.numInstances(); i++) {
            metaData.add(metaInstance(test.instance(i)));
        }
    }

    m_MetaClassifier.buildClassifier(metaData);
}

From source file:com.sliit.normalize.NormalizeDataset.java

public String normalizeDataset() {
    System.out.println("start normalizing data");

    String filePathOut = "";
    try {/*from w  w  w  . j  a v  a 2s.  com*/

        CSVLoader loader = new CSVLoader();
        if (reducedDiemensionFile != null) {

            loader.setSource(reducedDiemensionFile);
        } else {
            if (tempFIle != null && tempFIle.exists()) {

                loader.setSource(tempFIle);
            } else {

                loader.setSource(csvFile);
            }
        }
        Instances dataInstance = loader.getDataSet();
        Normalize normalize = new Normalize();
        dataInstance.setClassIndex(dataInstance.numAttributes() - 1);
        normalize.setInputFormat(dataInstance);
        String directory = csvFile.getParent();
        outputFile = new File(directory + "/" + "normalized" + csvFile.getName());
        if (!outputFile.exists()) {

            outputFile.createNewFile();
        }
        CSVSaver saver = new CSVSaver();
        saver.setFile(outputFile);
        for (int i = 1; i < dataInstance.numInstances(); i++) {

            normalize.input(dataInstance.instance(i));
        }
        normalize.batchFinished();
        Instances outPut = new Instances(dataInstance, 0);
        for (int i = 1; i < dataInstance.numInstances(); i++) {

            outPut.add(normalize.output());
        }
        Attribute attribute = dataInstance.attribute(outPut.numAttributes() - 1);
        for (int j = 0; j < attribute.numValues(); j++) {

            if (attribute.value(j).equals("normal.")) {
                outPut.renameAttributeValue(attribute, attribute.value(j), "0");
            } else {
                outPut.renameAttributeValue(attribute, attribute.value(j), "1");
            }
        }
        saver.setInstances(outPut);
        saver.writeBatch();
        writeToNewFile(directory);
        filePathOut = directory + "norm" + csvFile.getName();
        if (tempFIle != null) {

            tempFIle.delete();
        }
        if (reducedDiemensionFile != null) {

            reducedDiemensionFile.delete();
        }
        outputFile.delete();
    } catch (IOException e) {

        log.error("Error occurred:" + e.getMessage());
    } catch (Exception e) {

        log.error("Error occurred:" + e.getMessage());
    }
    return filePathOut;
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * Evaluates cluster assignments with respect to actual class labels.
 * Assumes that m_Clusterer has been trained and tested on 
 * inst (minus the class)./*from w  w w. j  ava2  s . c  om*/
 * 
 * @param inst the instances (including class) to evaluate with respect to
 * @param fileName the name of the test file for incremental testing, 
 * if "" or null then not used
 * @throws Exception if something goes wrong
 */
private void evaluateClustersWithRespectToClass(Instances inst, String fileName) throws Exception {

    int numClasses = inst.classAttribute().numValues();
    int[][] counts = new int[m_numClusters][numClasses];
    int[] clusterTotals = new int[m_numClusters];
    double[] best = new double[m_numClusters + 1];
    double[] current = new double[m_numClusters + 1];
    DataSource source = null;
    Instances instances = null;
    Instance instance = null;
    int i;
    int numInstances;

    if (fileName == null)
        fileName = "";

    if (fileName.length() != 0) {
        source = new DataSource(fileName);
    } else
        source = new DataSource(inst);
    instances = source.getStructure(inst.classIndex());

    i = 0;
    while (source.hasMoreElements(instances)) {
        instance = source.nextElement(instances);
        if (m_clusterAssignments[i] >= 0) {
            counts[(int) m_clusterAssignments[i]][(int) instance.classValue()]++;
            clusterTotals[(int) m_clusterAssignments[i]]++;
        }
        i++;
    }
    numInstances = i;

    best[m_numClusters] = Double.MAX_VALUE;
    mapClasses(m_numClusters, 0, counts, clusterTotals, current, best, 0);

    m_clusteringResults.append("\n\nClass attribute: " + inst.classAttribute().name() + "\n");
    m_clusteringResults.append("Classes to Clusters:\n");
    String matrixString = toMatrixString(counts, clusterTotals, new Instances(inst, 0));
    m_clusteringResults.append(matrixString).append("\n");

    int Cwidth = 1 + (int) (Math.log(m_numClusters) / Math.log(10));
    // add the minimum error assignment
    for (i = 0; i < m_numClusters; i++) {
        if (clusterTotals[i] > 0) {
            m_clusteringResults.append("Cluster " + Utils.doubleToString((double) i, Cwidth, 0));
            m_clusteringResults.append(" <-- ");

            if (best[i] < 0) {
                m_clusteringResults.append("No class\n");
            } else {
                m_clusteringResults.append(inst.classAttribute().value((int) best[i])).append("\n");
            }
        }
    }
    m_clusteringResults.append("\nIncorrectly clustered instances :\t" + best[m_numClusters] + "\t"
            + (Utils.doubleToString((best[m_numClusters] / numInstances * 100.0), 8, 4)) + " %\n");

    // copy the class assignments
    m_classToCluster = new int[m_numClusters];
    for (i = 0; i < m_numClusters; i++) {
        m_classToCluster[i] = (int) best[i];
    }
}