Example usage for weka.core Instances get

List of usage examples for weka.core Instances get

Introduction

In this page you can find the example usage for weka.core Instances get.

Prototype



@Override
publicInstance get(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:ANN_Single.SinglelayerPerceptron.java

@Override
public void buildClassifier(Instances i) {
    listOutput = new ArrayList<>();
    for (int num = 0; num < i.numClasses(); num++) {
        listOutput.add(new Node(i.numAttributes()));
    }/*from   w  ww . j  a v a  2 s  .co m*/
    while (true) {//ulang iterasi
        //            System.out.println();
        //            System.out.println("iterasi "+itt);
        for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) {
            //buat list input
            //                 System.out.print(idxInstance+" ");
            ArrayList<Double> listInput = new ArrayList<>();
            listInput.add(1.0);
            for (int idx = 0; idx < i.numAttributes() - 1; idx++) {
                listInput.add(i.get(idxInstance).value(idx));
            }

            //hitung output layer
            for (int idxOutput = 0; idxOutput < listOutput.size(); idxOutput++) {
                output(listInput, idxOutput);
                //                    listOutput.get(idxOutput).setValue(outputVal);
                //                    System.out.print(listOutput.get(idxOutput).getValue()+" ");
            }
            //                System.out.println();
            //hitung error
            calculateError(idxInstance);
            //update bobot
            updateWeight(listInput);
        }
        double error = 0;
        for (int idxErr = 0; idxErr < i.numInstances(); idxErr++) {
            for (int idx = 0; idx < listOutput.size(); idx++) {
                error += Math.pow(listOutput.get(idx).getError(), 2) / 2;
                //                    System.out.println(listOutput.get(idx).getError());
            }
            //                System.out.println(error);
        }
        System.out.println(error);
        System.out.println();
        if (error <= 0)
            break;
    }
    fold++;
    System.out.println("Fold ke-" + fold);
    double error = 0;
    for (int idxErr = 0; idxErr < i.numInstances(); idxErr++) {
        for (Node listOutput1 : listOutput) {
            error += Math.pow(listOutput1.getError(), 2) / 2;
            //                    System.out.println(listOutput1.getError());
        }
        //                System.out.println(error);
    }
    System.out.println("error " + error);
    for (int idx = 0; idx < listOutput.size(); idx++) {
        System.out.println("Output value " + listOutput.get(idx).getValue());
        System.out.println("Output error " + listOutput.get(idx).getError());
        for (int idx2 = 0; idx2 < listOutput.get(idx).getWeightSize(); idx2++)
            System.out.println("Output weight" + listOutput.get(idx).getWeightFromList(idx2));
    }
}

From source file:ANN_single2.MultilayerPerceptron.java

@Override
public void buildClassifier(Instances i) {

    //mengubah class menjadi numeric (diambil indexnya)
    listDoubleinstance = new double[i.numInstances()];
    for (int numIns = 0; numIns < i.numInstances(); numIns++) {
        listDoubleinstance[numIns] = i.instance(numIns).toDoubleArray()[i.classIndex()];
    }//w w w.j a v  a  2  s  .  c  om
    int cnt = 0;
    for (int itt = 0; itt < 10000; itt++) {
        for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) {
            //buat list input
            ArrayList<Double> listInput = new ArrayList<>();
            listInput.add(1.0); //ini untuk bias
            for (int ins = 0; ins < i.get(idxInstance).numAttributes() - 1; ins++) {
                listInput.add(i.get(idxInstance).value(ins));
            }

            ArrayList<Double> listHide = new ArrayList<>();
            listHide.add(1.0);
            //Hitung output hidden layer
            for (int idxHidden = 1; idxHidden < listHidden.size(); idxHidden++) {
                output(listHidden, listInput, idxHidden);
                listHide.add(listHidden.get(idxHidden).getValue());
            }

            //Hitung ouput output lyer
            for (int idxOutput = 0; idxOutput < listOutput.size(); idxOutput++) {
                output(listOutput, listHide, idxOutput);
            }

            //Hitung error
            calculateError(idxInstance);
            //update bobot
            updateBobot(listInput);
        }
        //Hitung seluruh error untuk menentukan kapan harus berhenti
        //            double error = 0;
        //            for (int idx =0; idx < i.numInstances(); idx++) {
        //                for (int idxOut=0; idxOut < listOutput.size(); idxOut++) {
        //                    error += Math.pow(listOutput.get(idxOut).getError(), 2)/2;
        //                }
        //            }
        //            cnt++;
        //            if (cnt==1000) {
        //                System.out.println("error " + error);
        //                cnt=0;
        //            }
        //            if (error <= threshold) break;
    }
    double error = 0;
    fold++;
    for (int idx = 0; idx < i.numInstances(); idx++) {
        for (int idxOut = 0; idxOut < listOutput.size(); idxOut++) {
            error += Math.pow(listOutput.get(idxOut).getError(), 2) / 2;
        }
    }
    System.out.println("Fold " + fold);
    System.out.println("error " + error);

}

From source file:ANN_single2.SinglelayerPerceptron.java

@Override
public void buildClassifier(Instances i) {
    listOutput = new ArrayList<>();
    for (int idx = 0; idx < i.numClasses(); idx++) {
        listOutput.add(new Node(i.numAttributes()));
    }/*from   w  w w.j  a v a  2  s.  com*/

    //mengubah class menjadi numeric (diambil indexnya)
    listDoubleinstance = new double[i.numInstances()];
    for (int numIns = 0; numIns < i.numInstances(); numIns++) {
        listDoubleinstance[numIns] = i.instance(numIns).toDoubleArray()[i.classIndex()];
    }

    double error = 0;
    for (int iter = 0; iter < itteration; iter++) {
        double errorThres = 0;
        for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) {

            //buat list input
            ArrayList<Double> listInput = new ArrayList<>();
            listInput.add(1.0); //ini bias
            for (int idx = 0; idx < i.numAttributes() - 1; idx++) {
                listInput.add(i.get(idxInstance).value(idx));
            }

            //Hitung output rumus = sigmoid dari sigma
            for (int idxOut = 0; idxOut < listOutput.size(); idxOut++) {
                output(listInput, idxOut);
            }

            //Hitung error
            calculateError(idxInstance);
            //update bobot
            updateBobot(listInput);

        }
        for (int idxOut = 0; idxOut < listOutput.size(); idxOut++) {
            errorThres += Math.pow(listOutput.get(idxOut).getError(), 2) / 2;
        }
        if (errorThres <= threshold)
            break;
        //            System.out.println(errorThres);
    }
    //        fold++;
    //        for (int idx =0; idx < i.numInstances(); idx++) {
    //            for (int idxOut=0; idxOut < listOutput.size(); idxOut++) {
    //                error += Math.pow(listOutput.get(idxOut).getError(), 2)/2;
    //            }
    //        }
    //        System.out.println("Fold " + fold);
    //        System.out.println("error " + error);
}

From source file:app.RunApp.java

License:Open Source License

/**
 * Preprocess dataset// ww  w . j av  a2s .  c o  m
 * 
 * @return Positive number if successfull and negative otherwise
 */
private int preprocess() {
    trainDatasets = new ArrayList();
    testDatasets = new ArrayList();

    Instances train, test;

    if (dataset == null) {
        JOptionPane.showMessageDialog(null, "You must load a dataset.", "alert", JOptionPane.ERROR_MESSAGE);
        return -1;
    }

    MultiLabelInstances preprocessDataset = dataset.clone();

    if (!radioNoIS.isSelected()) {
        //Do Instance Selection
        if (radioRandomIS.isSelected()) {
            int nInstances = Integer.parseInt(textRandomIS.getText());

            if (nInstances < 1) {
                JOptionPane.showMessageDialog(null,
                        "The number of instances must be a positive natural number.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            } else if (nInstances > dataset.getNumInstances()) {
                JOptionPane.showMessageDialog(null,
                        "The number of instances to select must be less than the original.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            Instances dataIS;
            try {
                Randomize randomize = new Randomize();
                dataIS = dataset.getDataSet();

                randomize.setInputFormat(dataIS);
                dataIS = Filter.useFilter(dataIS, randomize);
                randomize.batchFinished();

                RemoveRange removeRange = new RemoveRange();
                removeRange.setInputFormat(dataIS);
                removeRange.setInstancesIndices((nInstances + 1) + "-last");

                dataIS = Filter.useFilter(dataIS, removeRange);
                removeRange.batchFinished();

                preprocessDataset = dataset.reintegrateModifiedDataSet(dataIS);
            } catch (Exception ex) {
                Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex);
            }

            if (preprocessDataset == null) {
                JOptionPane.showMessageDialog(null, "Error when selecting instances.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            preprocessedDataset = preprocessDataset;
        }
    }

    if (!radioNoFS.isSelected()) {
        //FS_BR
        if (radioBRFS.isSelected()) {
            int nFeatures = Integer.parseInt(textBRFS.getText());
            if (nFeatures < 1) {
                JOptionPane.showMessageDialog(null, "The number of features must be a positive natural number.",
                        "alert", JOptionPane.ERROR_MESSAGE);
                return -1;
            } else if (nFeatures > dataset.getFeatureIndices().length) {
                JOptionPane.showMessageDialog(null,
                        "The number of features to select must be less than the original.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            String combination = jComboBoxBRFSComb.getSelectedItem().toString();
            String normalization = jComboBoxBRFSNorm.getSelectedItem().toString();
            String output = jComboBoxBRFSOut.getSelectedItem().toString();

            FeatureSelector fs;
            if (radioNoIS.isSelected()) {
                fs = new FeatureSelector(dataset, nFeatures);
            } else {
                //If IS have been done
                fs = new FeatureSelector(preprocessDataset, nFeatures);
            }

            preprocessedDataset = fs.select(combination, normalization, output);

            if (preprocessedDataset == null) {
                JOptionPane.showMessageDialog(null, "Error when selecting features.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            preprocessDataset = preprocessedDataset;
        } else if (radioRandomFS.isSelected()) {
            int nFeatures = Integer.parseInt(textRandomFS.getText());

            if (nFeatures < 1) {
                JOptionPane.showMessageDialog(null, "The number of features must be a positive natural number.",
                        "alert", JOptionPane.ERROR_MESSAGE);
                return -1;
            } else if (nFeatures > dataset.getFeatureIndices().length) {
                JOptionPane.showMessageDialog(null,
                        "The number of features to select must be less than the original.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            FeatureSelector fs;

            if (radioNoIS.isSelected()) {
                fs = new FeatureSelector(dataset, nFeatures);
            } else {
                //If IS have been done
                fs = new FeatureSelector(preprocessDataset, nFeatures);
            }

            preprocessedDataset = fs.randomSelect();

            if (preprocessedDataset == null) {
                JOptionPane.showMessageDialog(null, "Error when selecting features.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            preprocessDataset = preprocessedDataset;
        }
    }

    if (!radioNoSplit.isSelected()) {
        //Random Holdout
        if (radioRandomHoldout.isSelected()) {
            String split = textRandomHoldout.getText();
            double percentage = Double.parseDouble(split);
            if ((percentage <= 0) || (percentage >= 100)) {
                JOptionPane.showMessageDialog(null, "The percentage must be a number in the range (0, 100).",
                        "alert", JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            try {
                RandomTrainTest pre = new RandomTrainTest();
                MultiLabelInstances[] partitions = pre.split(preprocessDataset, percentage);
                trainDataset = partitions[0];
                testDataset = partitions[1];
            } catch (InvalidDataFormatException ex) {
                Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex);
            } catch (Exception ex) {
                Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        //Random CV
        else if (radioRandomCV.isSelected()) {
            String split = textRandomCV.getText();

            if (split.equals("")) {
                JOptionPane.showMessageDialog(null, "You must enter the number of folds.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            int nFolds;

            try {
                nFolds = Integer.parseInt(split);
            } catch (Exception e) {
                JOptionPane.showMessageDialog(null, "Introduce a correct number of folds.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            if (nFolds < 2) {
                JOptionPane.showMessageDialog(null, "The number of folds must be greater or equal to 2.",
                        "alert", JOptionPane.ERROR_MESSAGE);
                return -1;
            } else if (nFolds > preprocessDataset.getNumInstances()) {
                JOptionPane.showMessageDialog(null,
                        "The number of folds can not be greater than the number of instances.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            try {
                MultiLabelInstances temp = preprocessDataset.clone();
                Instances dataTemp = temp.getDataSet();

                int seed = (int) (Math.random() * 100) + 100;
                Random rand = new Random(seed);

                dataTemp.randomize(rand);

                Instances[] foldsCV = new Instances[nFolds];
                for (int i = 0; i < nFolds; i++) {
                    foldsCV[i] = new Instances(dataTemp);
                    foldsCV[i].clear();
                }

                for (int i = 0; i < dataTemp.numInstances(); i++) {
                    foldsCV[i % nFolds].add(dataTemp.get(i));
                }

                train = new Instances(dataTemp);
                test = new Instances(dataTemp);
                for (int i = 0; i < nFolds; i++) {
                    train.clear();
                    test.clear();
                    for (int j = 0; j < nFolds; j++) {
                        if (i != j) {
                            System.out.println("Add fold " + j + " to train");
                            train.addAll(foldsCV[j]);
                        }
                    }
                    System.out.println("Add fold " + i + " to test");
                    test.addAll(foldsCV[i]);
                    System.out.println(train.get(0).toString());
                    System.out.println(test.get(0).toString());
                    trainDatasets.add(new MultiLabelInstances(new Instances(train),
                            preprocessDataset.getLabelsMetaData()));
                    testDatasets.add(new MultiLabelInstances(new Instances(test),
                            preprocessDataset.getLabelsMetaData()));
                    System.out.println(trainDatasets.get(i).getDataSet().get(0).toString());
                    System.out.println(testDatasets.get(i).getDataSet().get(0).toString());
                    System.out.println("---");
                }
            }

            catch (Exception ex) {
                Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        //Iterative stratified holdout
        else if (radioIterativeStratifiedHoldout.isSelected()) {
            String split = textIterativeStratifiedHoldout.getText();
            double percentage = Double.parseDouble(split);
            if ((percentage <= 0) || (percentage >= 100)) {
                JOptionPane.showMessageDialog(null, "The percentage must be a number in the range (0, 100).",
                        "alert", JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            try {
                IterativeTrainTest pre = new IterativeTrainTest();
                MultiLabelInstances[] partitions = pre.split(preprocessDataset, percentage);

                trainDataset = partitions[0];
                testDataset = partitions[1];
            } catch (Exception ex) {
                Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        //Iterative stratified CV
        else if (radioIterativeStratifiedCV.isSelected()) {
            String split = textIterativeStratifiedCV.getText();

            if (split.equals("")) {
                JOptionPane.showMessageDialog(null, "You must enter the number of folds.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            int nFolds = 0;

            try {
                nFolds = Integer.parseInt(split);
            } catch (Exception e) {
                JOptionPane.showMessageDialog(null, "Introduce a correct number of folds.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            if (nFolds < 2) {
                JOptionPane.showMessageDialog(null, "The number of folds must be greater or equal to 2.",
                        "alert", JOptionPane.ERROR_MESSAGE);
                return -1;
            } else if (nFolds > preprocessDataset.getNumInstances()) {
                JOptionPane.showMessageDialog(null,
                        "The number of folds can not be greater than the number of instances.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            IterativeStratification strat = new IterativeStratification();
            MultiLabelInstances folds[] = strat.stratify(preprocessDataset, nFolds);

            for (int i = 0; i < nFolds; i++) {
                try {

                    int trainSize = 0, testSize = 0;
                    for (int j = 0; j < nFolds; j++) {
                        if (i != j) {
                            trainSize += folds[j].getNumInstances();
                        }
                    }
                    testSize += folds[i].getNumInstances();

                    train = new Instances(preprocessDataset.getDataSet(), trainSize);
                    test = new Instances(preprocessDataset.getDataSet(), testSize);
                    for (int j = 0; j < nFolds; j++) {
                        if (i != j) {
                            train.addAll(folds[j].getDataSet());
                        }
                    }
                    test.addAll(folds[i].getDataSet());

                    trainDatasets.add(new MultiLabelInstances(train, preprocessDataset.getLabelsMetaData()));
                    testDatasets.add(new MultiLabelInstances(test, preprocessDataset.getLabelsMetaData()));
                } catch (InvalidDataFormatException ex) {
                    Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex);
                }
            }

        }
        //LP stratified holdout
        else if (radioLPStratifiedHoldout.isSelected()) {
            String split = textLPStratifiedHoldout.getText();
            double percentage = Double.parseDouble(split);
            if ((percentage <= 0) || (percentage >= 100)) {
                JOptionPane.showMessageDialog(null, "The percentage must be a number in the range (0, 100).",
                        "alert", JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            try {
                IterativeTrainTest pre = new IterativeTrainTest();
                MultiLabelInstances[] partitions = pre.split(preprocessDataset, percentage);

                trainDataset = partitions[0];
                testDataset = partitions[1];
            } catch (Exception ex) {
                Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        //LP stratified CV
        else if (radioLPStratifiedCV.isSelected()) {
            String split = textLPStratifiedCV.getText();

            if (split.equals("")) {
                JOptionPane.showMessageDialog(null, "You must enter the number of folds.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            int nFolds = 0;

            try {
                nFolds = Integer.parseInt(split);
            } catch (Exception e) {
                JOptionPane.showMessageDialog(null, "Introduce a correct number of folds.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            if (nFolds < 2) {
                JOptionPane.showMessageDialog(null, "The number of folds must be greater or equal to 2.",
                        "alert", JOptionPane.ERROR_MESSAGE);
                return -1;
            } else if (nFolds > preprocessDataset.getNumInstances()) {
                JOptionPane.showMessageDialog(null,
                        "The number of folds can not be greater than the number of instances.", "alert",
                        JOptionPane.ERROR_MESSAGE);
                return -1;
            }

            LabelPowersetTrainTest strat = new LabelPowersetTrainTest();
            MultiLabelInstances folds[] = strat.stratify(preprocessDataset, nFolds);

            for (int i = 0; i < nFolds; i++) {
                try {
                    train = new Instances(preprocessDataset.getDataSet(), 0);
                    test = new Instances(preprocessDataset.getDataSet(), 0);

                    for (int j = 0; j < nFolds; j++) {
                        if (i != j) {
                            train.addAll(folds[j].getDataSet());
                        }
                    }
                    test.addAll(folds[i].getDataSet());

                    trainDatasets.add(new MultiLabelInstances(train, preprocessDataset.getLabelsMetaData()));
                    testDatasets.add(new MultiLabelInstances(test, preprocessDataset.getLabelsMetaData()));
                } catch (InvalidDataFormatException ex) {
                    Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
    }

    jButtonSaveDatasets.setEnabled(true);
    jComboBoxSaveFormat.setEnabled(true);

    return 1;
}

From source file:asap.PostProcess.java

private void writePredictionErrors(Instances instances, double[] predictions, String errorsFilename) {

    TreeSet<PredictionError> errors = new TreeSet<>();

    for (int i = 0; i < predictions.length; i++) {
        double prediction = predictions[i];
        double expected = instances.get(i).classValue();
        int pairId = (int) instances.get(i).value(instances.attribute("pair_ID"));
        String sourceFile = instances.get(i).stringValue(instances.attribute("source_file"));
        PredictionError pe = new PredictionError(prediction, expected, pairId, sourceFile, instances.get(i));

        //if (pe.getError()>=0.5d)
        errors.add(pe);/*from w w w  .  jav a  2 s .  c o  m*/
    }

    StringBuilder sb = new StringBuilder();

    for (PredictionError error : errors) {
        sb.append(error.toString()).append("\n");
    }

    File f = new File(errorsFilename);
    try (FileOutputStream fos = new FileOutputStream(f)) {
        fos.write(sb.toString().getBytes());
    } catch (IOException ex) {
        Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:be.uza.keratoconus.analysis.impl.PreTrainedModel.java

License:Open Source License

@Override
public void processPatientExam(PatientExam exam) {
    examData = exam.getExamData();//w  ww .j a  v  a2  s  .  c o m
    String headerLine = "";
    String dataLine = "";
    int nColumns = 0;
    for (String fieldName : classificationModelService.getUsedFields()) {
        if (examData.containsKey(fieldName)) {
            headerLine += fieldName + SEMICOLON;
            final String fieldValue = examData.get(fieldName);
            // TODO fatal error if fieldValue is null?
            dataLine += fieldValue + SEMICOLON;
            ++nColumns;
        } else if (examData.containsKey(fieldName + " " + Face.FRONT)) {
            headerLine += fieldName + " " + Face.FRONT + SEMICOLON;
            final String frontFieldValue = examData.get(fieldName + " " + Face.FRONT);
            // TODO fatal error if fieldValue is null?
            dataLine += frontFieldValue + SEMICOLON;
            ++nColumns;
            headerLine += fieldName + " " + Face.BACK + SEMICOLON;
            final String backFieldValue = examData.get(fieldName + " " + Face.BACK);
            // TODO fatal error if fieldValue is null?
            dataLine += backFieldValue + SEMICOLON;
            ++nColumns;
        }
    }

    String csv = headerLine + "Class\n" + dataLine + "?\n";
    CSVLoader csvLoader = new CSVLoader();
    csvLoader.setFieldSeparator(SEMICOLON);
    try {
        csvLoader.setSource(new ByteArrayInputStream(csv.getBytes(Charset.forName("windows-1252"))));
        final Instances dataSet = csvLoader.getDataSet();
        dataSet.setClassIndex(nColumns);
        instance = dataSet.get(0);
    } catch (Exception e) {
        logService.log(ownComponentContext.getServiceReference(), LogService.LOG_WARNING,
                "Exception thrown when reading CSV record", e);
    }
}

From source file:br.com.edu.arff.LoadArff.java

public ArrayList<Cluster> carregarArff(String caminho) throws FileNotFoundException, IOException {
    BufferedReader reader = new BufferedReader(new FileReader(caminho));
    ArffReader arff = new ArffReader(reader);
    Instances data = arff.getData();
    data.setClassIndex(data.numAttributes() - 1);
    Instance inst = null;/*w ww . j  a va  2  s  . c om*/
    Attribute att = data.attribute("Cluster");

    ArrayList<String> uris;
    ArrayList<Cluster> lista = new ArrayList<Cluster>();

    Fuseki fuseki = new Fuseki();

    uris = fuseki.buscaURIS();

    for (int i = 0; i <= data.numInstances() - 1; i++) {
        Cluster cluster = new Cluster();
        String clusters = String.valueOf(data.get(i).stringValue(att));
        cluster.setUri(uris.get(i));
        cluster.setGrupo(clusters);
        lista.add(cluster);
    }

    //        for (Cluster c : lista) {
    //              System.out.println(c.getUri());
    //              System.out.println(c.getGrupo());
    //        } 
    return lista;
}

From source file:clusterer.SimpleKMeansWithSilhouette.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options.//w w  w .  j  a  v  a  2 s.  c  o  m
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {

    m_canopyClusters = null;

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][];
    m_ClusterMissingCounts = new double[m_NumClusters][instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = instances.variances();
    }

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, true, false);

    m_FullMissingCounts = m_ClusterMissingCounts[0];
    m_FullNominalCounts = m_ClusterNominalCounts[0];
    double sumOfWeights = instances.sumOfWeights();
    for (int i = 0; i < instances.numAttributes(); i++) {
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(m_FullStdDevs[i]);
            }
            if (m_FullMissingCounts[i] == sumOfWeights) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean
            }
        } else {
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common
                                                     // value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder) {
        m_Assignments = clusterAssignments;
    }

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap<DecisionTableHashKey, Integer> initC = new HashMap<DecisionTableHashKey, Integer>();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder) {
        initInstances = new Instances(instances);
    } else {
        initInstances = instances;
    }

    if (m_speedUpDistanceCompWithCanopies) {
        m_canopyClusters = new Canopy();
        m_canopyClusters.setNumClusters(m_NumClusters);
        m_canopyClusters.setSeed(getSeed());
        m_canopyClusters.setT2(getCanopyT2());
        m_canopyClusters.setT1(getCanopyT1());
        m_canopyClusters.setMaxNumCandidateCanopiesToHoldInMemory(getCanopyMaxNumCanopiesToHoldInMemory());
        m_canopyClusters.setPeriodicPruningRate(getCanopyPeriodicPruningRate());
        m_canopyClusters.setMinimumCanopyDensity(getCanopyMinimumCanopyDensity());
        m_canopyClusters.setDebug(getDebug());
        m_canopyClusters.buildClusterer(initInstances);
        // System.err.println(m_canopyClusters);
        m_centroidCanopyAssignments = new ArrayList<long[]>();
        m_dataPointCanopyAssignments = new ArrayList<long[]>();
    }

    if (m_initializationMethod == KMEANS_PLUS_PLUS) {
        kMeansPlusPlusInit(initInstances);

        m_initialStartPoints = new Instances(m_ClusterCentroids);
    } else if (m_initializationMethod == CANOPY) {
        canopyInit(initInstances);

        m_initialStartPoints = new Instances(m_canopyClusters.getCanopies());
    } else if (m_initializationMethod == FARTHEST_FIRST) {
        farthestFirstInit(initInstances);

        m_initialStartPoints = new Instances(m_ClusterCentroids);
    } else {
        // random
        for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
            instIndex = RandomO.nextInt(j + 1);
            hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(),
                    true);
            if (!initC.containsKey(hk)) {
                m_ClusterCentroids.add(initInstances.instance(instIndex));
                initC.put(hk, null);
            }
            initInstances.swap(j, instIndex);

            if (m_ClusterCentroids.numInstances() == m_NumClusters) {
                break;
            }
        }

        m_initialStartPoints = new Instances(m_ClusterCentroids);
    }

    if (m_speedUpDistanceCompWithCanopies) {
        // assign canopies to training data
        for (int i = 0; i < instances.numInstances(); i++) {
            m_dataPointCanopyAssignments.add(m_canopyClusters.assignCanopies(instances.instance(i)));
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new double[m_NumClusters][instances.numAttributes()];
    startExecutorPool();

    while (!converged) {
        if (m_speedUpDistanceCompWithCanopies) {
            // re-assign canopies to the current cluster centers
            m_centroidCanopyAssignments.clear();
            for (int kk = 0; kk < m_ClusterCentroids.numInstances(); kk++) {
                m_centroidCanopyAssignments
                        .add(m_canopyClusters.assignCanopies(m_ClusterCentroids.instance(kk)));
            }
        }

        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;

        if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) {
            for (i = 0; i < instances.numInstances(); i++) {
                Instance toCluster = instances.instance(i);
                int newC = clusterProcessedInstance(toCluster, false, true,
                        m_speedUpDistanceCompWithCanopies ? m_dataPointCanopyAssignments.get(i) : null);
                if (newC != clusterAssignments[i]) {
                    converged = false;
                }
                clusterAssignments[i] = newC;
            }
        } else {
            converged = launchAssignToClusters(instances, clusterAssignments);
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) {
            for (i = 0; i < m_NumClusters; i++) {
                if (tempI[i].numInstances() == 0) {
                    // empty cluster
                    emptyClusterCount++;
                } else {
                    moveCentroid(i, tempI[i], true, true);
                }
            }
        } else {
            emptyClusterCount = launchMoveCentroids(tempI);
        }

        if (m_Iterations == m_MaxIterations) {
            converged = true;
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index] = tempI[k];

                        for (i = 0; i < tempI[k].numAttributes(); i++) {
                            m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i];
                        }
                        index++;
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (!converged) {
            m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][0];
        }
    }

    // calculate errors
    if (!m_FastDistanceCalc) {
        for (i = 0; i < instances.numInstances(); i++) {
            clusterProcessedInstance(instances.instance(i), true, false, null);
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new double[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = tempI[i].variances();
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(vals2[j]);
                } else {
                    vals2[j] = Utils.missingValue();
                }
            }
            m_ClusterStdDevs.add(new DenseInstance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].sumOfWeights();
    }

    m_executorPool.shutdown();

    // save memory!
    m_DistanceFunction.clean();

    // Calculate Silhouette Coefficient
    SilCoeff = new double[instances.numInstances()];
    AvgSilCoeff = 0;
    for (int z = 0; z < instances.numInstances(); z++) {
        double[] distance = new double[m_NumClusters];
        Arrays.fill(distance, 0.0);
        //Sum
        for (int y = 0; y < instances.numInstances(); y++) {
            distance[clusterAssignments[y]] += m_DistanceFunction.distance(instances.get(z), instances.get(y));
        }
        //Average
        for (int x = 0; x < m_NumClusters; x++) {
            distance[x] = distance[x] / m_ClusterSizes[x];
        }
        double a = distance[clusterAssignments[z]];
        distance[clusterAssignments[z]] = Double.MAX_VALUE;
        Arrays.sort(distance);
        double b = distance[0];
        SilCoeff[z] = (b - a) / Math.max(a, b);
        AvgSilCoeff += SilCoeff[z];
    }
    AvgSilCoeff = AvgSilCoeff / instances.numInstances();
    //System.out.println("AvgSilCoeff: " + AvgSilCoeff);
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static Instance findBestPerf(Instances data) {
    int idx = data.numAttributes() - 1;
    double bestPerf = data.attributeStats(idx).numericStats.max;
    for (int i = 0; i < data.numInstances(); i++)
        if (data.get(i).value(idx) == bestPerf)
            return data.get(i);
    return null;//should never return NULL
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static int findBestPerfIndex(Instances data) {
    int idx = data.numAttributes() - 1;
    double bestPerf = data.attributeStats(idx).numericStats.max;
    for (int i = 0; i < data.numInstances(); i++)
        if (data.get(i).value(idx) == bestPerf)
            return i;
    return -1;//should never return -1
}