Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset) 

Source Link

Document

Constructor copying all instances and references to the header information from the given set of instances.

Usage

From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java

License:Open Source License

/**
 * constructor of BPSO/*from  w  w w. ja  va2s.  c o  m*/
 * 
 * @param fileName   input data set
 * @param detail   printing mode
 */
public BPSO(String fileName, int iteration, int popSize, boolean detail) {

    // initialize PSO parameters
    this.iteration = iteration;
    this.popSize = popSize;
    this.verbose = detail;
    rand = new Random(System.currentTimeMillis());
    avgFitness = 0.0;
    selectedSample = new ArrayList<String>();
    //tournamentSize = 2;

    // class ratio variables
    double c1 = 0.0;
    double c2 = 0.0;
    double ratio = 0.0;

    // load in the imbalanced data set
    try {
        dataset = new Instances(new BufferedReader(new FileReader(fileName)));
        dataset.setClassIndex(dataset.numAttributes() - 1);
    } catch (IOException ioe) {
        ioe.printStackTrace();
    }

    // calculate the imbalanced ratio
    for (int i = 0; i < dataset.numInstances(); i++) {
        if (dataset.instance(i).classValue() == 0) {
            c1++;
        } else {
            c2++;
        }
    }

    if (c1 > c2) {
        majorLabel = 0;
        ratio = c2 / (c1 + c2);
    } else {
        majorLabel = 1;
        ratio = c1 / (c1 + c2);
    }

    System.out.println("-------------------- data stats ----------------------");
    System.out.println("sample of class 0: " + c1);
    System.out.println("sample of class 1: " + c2);
    System.out.println("minority class ratio: " + ratio);
}

From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java

License:Open Source License

/**
 * this method starts the under sampling procedure
 *//*  www  . ja v a  2 s  . c  o  m*/
public void underSampling() {
    // create a copy of original data set for cross validation
    Instances randData = new Instances(dataset);

    // dividing the data set to 3 folds
    randData.stratify(3);

    for (int fold = 0; fold < 3; fold++) {
        // using the first 2 folds as internal training set. the last fold as the internal test set.
        internalTrain = randData.trainCV(3, fold);
        internalTest = randData.testCV(3, fold);

        // calculate the number of the major class samples in the internal training set
        majorSize = 0;

        for (int i = 0; i < internalTrain.numInstances(); i++) {
            if (internalTrain.instance(i).classValue() == majorLabel) {
                majorSize++;
            }
        }

        // class variable initialization
        dec = new DecimalFormat("##.####");
        localBest = new double[popSize];
        localBestParticles = new int[popSize][majorSize];
        globalBest = Double.MIN_VALUE;
        globalBestParticle = new int[majorSize];
        velocity = new double[popSize][majorSize];
        particles = new int[popSize][majorSize];
        searchSpace = new double[popSize][majorSize];

        System.out.println("-------------------- parameters ----------------------");
        System.out.println("CV fold = " + fold);
        System.out.println("inertia weight = " + w);
        System.out.println("c1,c2 = " + c1);
        System.out.println("iteration time = " + iteration);
        System.out.println("population size = " + popSize);

        // initialize BPSO
        initialization();

        // perform optimization process
        findMaxFit();

        // save optimization results to array list
        saveResults();
    }

    // rank the selected samples and build the balanced dataset
    try {
        createBalanceData();
    } catch (IOException ioe) {
        ioe.printStackTrace();
    }

}

From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java

License:Open Source License

/**
 * the target function in fitness form/*from  w  w  w. j a  v a2 s  . c  o m*/
 * 
 * @return   classification accuracy
 */
public double ensembleClassify() {
    double fitnessValue = 0.0;
    double classifiersScore = 0.0;

    /* load in the modified data set */
    try {
        Instances reducedSet = new Instances(new BufferedReader(new FileReader("reduced.arff")));
        reducedSet.setClassIndex(reducedSet.numAttributes() - 1);

        // calculating the evaluation values using each classifier respectively
        if (verbose == true) {
            System.out.println();
            System.out.println(" |----------J4.8-----------|");
            System.out.println(" |            |            |");
        }
        J48 tree = new J48();
        classifiersScore = classify(tree, reducedSet, internalTest);
        fitnessValue += classifiersScore;

        if (verbose == true) {
            System.out.println();
            System.out.println(" |-----3NearestNeighbor----|");
            System.out.println(" |            |            |");
        }
        IBk nn3 = new IBk(3);
        classifiersScore = classify(nn3, reducedSet, internalTest);
        fitnessValue += classifiersScore;

        if (verbose == true) {
            System.out.println();
            System.out.println(" |--------NaiveBayes-------|");
            System.out.println(" |            |            |");
        }
        NaiveBayes nb = new NaiveBayes();
        classifiersScore = classify(nb, reducedSet, internalTest);
        fitnessValue += classifiersScore;

        if (verbose == true) {
            System.out.println();
            System.out.println(" |-------RandomForest------|");
            System.out.println(" |            |            |");
        }
        RandomForest rf5 = new RandomForest();
        rf5.setNumTrees(5);
        classifiersScore = classify(rf5, reducedSet, internalTest);
        fitnessValue += classifiersScore;

        if (verbose == true) {
            System.out.println();
            System.out.println(" |---------Logistic--------|");
            System.out.println(" |            |            |");
        }
        Logistic log = new Logistic();
        classifiersScore = classify(log, reducedSet, internalTest);
        fitnessValue += classifiersScore;

    } catch (IOException ioe) {
        ioe.printStackTrace();
    }

    fitnessValue /= 5;

    if (verbose == true) {
        System.out.println();
        System.out.println("Fitness: " + fitnessValue);
        System.out.println("---------------------------------------------------");
    }

    return fitnessValue;
}

From source file:au.edu.usyd.it.yangpy.snp.GEsnpxPara.java

License:Open Source License

/**
 * this function perform genetic operations
 *
 * @param saveFlag   append/write to the output file
 *//*from w  w w . j  a  v  a 2s . c  o  m*/
public void performGeneticOperation(int saveFlag) throws Exception {
    // initialize processing components
    // loading the raw data
    Instances rawData = new Instances(new BufferedReader(new FileReader(file)));
    rawData.setClassIndex(rawData.numAttributes() - 1);

    ParallelGenetic genetic = new ParallelGenetic(rawData, chroLen, popSize, terGener, mode, balance, diversity,
            numThread);
    genetic.initializeParameters();
    genetic.initializeChromosomes();
    genetic.evaluate();

    for (int i = 1; i < genetic.getTerimateGeneration(); i++) {
        genetic.selectElitism();
        genetic.selectUsingTournament();
        genetic.crossover();
        genetic.mutate();
        genetic.generateNewGeneration();
        genetic.evaluate();
    }

    if (saveFlag == 0)
        genetic.saveBestChro(false);
    else
        genetic.saveBestChro(true);
}

From source file:au.edu.usyd.it.yangpy.snp.ParallelGenetic.java

License:Open Source License

public void crossValidate() {
    // create a copy of original training set for CV
    Instances randData = new Instances(data);

    // divide the data set with x-fold stratify measure
    randData.stratify(foldSize);//w w w . ja va2  s . co m

    try {

        cvTrain = randData.trainCV(foldSize, foldIndex);
        cvTest = randData.testCV(foldSize, foldIndex);

        foldIndex++;

        if (foldIndex >= foldSize) {
            foldIndex = 0;
        }

    } catch (Exception e) {
        System.out.println(cvTest.toString());
    }
}

From source file:au.edu.usyd.it.yangpy.snp.ParallelGenetic.java

License:Open Source License

/**
 * constrain data set with a given SNP subset
 * // w  w w.  jav a  2s.com
 * @param cId   chromosome Id
 * @param train   training instances
 * @param test   test instances
 */
public double computeFitess(int cId) throws Exception {

    Instances cTrain = new Instances(cvTrain);
    Instances cTest = new Instances(cvTest);

    int len = 0;
    for (int i = 0; i < chro[cId].length; i++) {
        if (chro[cId][i] != -1) {
            len++;
        }
    }

    int[] deleteList = new int[data.numAttributes() - 1 - len];

    int delId = 0;
    for (int i = 0; i < data.numAttributes() - 1; i++) {

        boolean testContain = false;

        for (int j = 0; j < chro[cId].length; j++) {
            if (i == chro[cId][j]) {
                testContain = true;
            }
        }

        if (testContain == false) {
            deleteList[delId] = i;
            delId++;
        }
    }

    Arrays.sort(deleteList);
    // reverse the array 
    for (int i = 0; i < deleteList.length / 2; ++i) {
        int temp = deleteList[i];
        deleteList[i] = deleteList[deleteList.length - i - 1];
        deleteList[deleteList.length - i - 1] = temp;
    }

    for (int i = 0; i < deleteList.length; i++) {
        cTrain.deleteAttributeAt(deleteList[i]);
        cTest.deleteAttributeAt(deleteList[i]);
    }

    ////////////////////////////////////////////
    // compute fitness
    double fitness = 0.0;

    //boolean useDiversity = false;

    if (mode.equals("v")) {
        System.out.println("---------------------------------------------");
        System.out.println(" subset " + (cId + 1) + ":");
        System.out.println();
    }

    Ensemble classifier = new Ensemble(cTrain, cTest);
    classifier.ensemble(mode);
    double blockScore = classifier.blocking();
    double voteScore = classifier.voting();
    double diversityScore = 0.0;

    if (currGener < (terGener - (terGener / 5))) {
        if (diversity.equals("K")) {
            diversityScore = classifier.kappaDiversity();
        } else {
            diversityScore = classifier.doubleFaultDiversity();
        }
    }

    blockScore = Math.round(blockScore * 10000.0) / 10000.0;
    voteScore = Math.round(voteScore * 10000.0) / 10000.0;

    if (diversityScore != 0.0) {
        diversityScore = Math.round(diversityScore * 10000.0) / 10000.0;
        fitness = blockScore * 0.45 + voteScore * 0.45 + diversityScore * 0.1;
    } else {
        fitness = blockScore * 0.5 + voteScore * 0.5;
    }

    // average accuracy of five classifiers.
    if (mode.equals("v")) {
        System.out.println("block (average) AUC: " + blockScore);
        System.out.println("majority voting AUC: " + voteScore);

        if (diversityScore != 0.0) {
            if (diversity.equals("K")) {
                System.out.println("kappa diversity: " + diversityScore);
            } else {
                System.out.println("double fault diversity: " + diversityScore);
            }
        }
    }

    avgFitness += fitness;
    return fitness;

}

From source file:aw_cluster.AW_Cluster.java

/**
 * @param args the command line arguments
 *///from  www  .  j a  v a2  s .c om
public static void main(String[] args) throws Exception {
    // TODO code application logic here
    Scanner sc = new Scanner(System.in);
    Instances trainingData;
    ClusterEvaluation eval;
    String path;
    int pilihan;
    int jumlahCluster;
    int maxIter;
    int typeLinkage;

    do {
        System.out.println("Masukan pilihan algoritma: ");
        System.out.println("1. MyKMeans");
        System.out.println("2. MyAgnes");
        System.out.println("3. Exit");
        System.out.print("Pilihan: ");
        pilihan = sc.nextInt();
        if (pilihan == 1) {
            path = masukanFile(sc);
            System.out.println("Masukan jumlah cluster: ");
            jumlahCluster = sc.nextInt();
            System.out.println("Masukan jumlah maksimum iterasi: ");
            maxIter = sc.nextInt();
            BufferedReader data = new BufferedReader(new FileReader(path));
            trainingData = new Instances(data);
            myKMeans kmeans = new myKMeans();
            kmeans.setNumCluster(jumlahCluster);
            kmeans.setMaxIteration(maxIter);
            kmeans.buildClusterer(trainingData);
            eval = new ClusterEvaluation();
            eval.setClusterer(kmeans);
            eval.evaluateClusterer(trainingData);
            System.out.println("Cluster Evaluation: " + eval.clusterResultsToString());
            System.out.println("");
        } else if (pilihan == 2) {
            path = masukanFile(sc);
            System.out.println("Masukan jumlah cluster: ");
            jumlahCluster = sc.nextInt();
            typeLinkage = typeLinkage(sc);
            BufferedReader data = new BufferedReader(new FileReader(path));
            trainingData = new Instances(data);
            myAgnes agnes = new myAgnes();
            agnes.setNumCluster(jumlahCluster);
            agnes.setLinkage(typeLinkage);
            agnes.buildClusterer(trainingData);
            eval = new ClusterEvaluation();
            eval.setClusterer(agnes);
            eval.evaluateClusterer(trainingData);
            System.out.println("Cluster Evaluation: " + eval.clusterResultsToString());
            System.out.println("");
        }
    } while (pilihan != 3);
}

From source file:aw_cluster.myAgnes.java

@Override
public void buildClusterer(Instances data) throws Exception {
    getCapabilities().testWithFail(data);

    instances = new Instances(data);

    instances.setClassIndex(-1);// w w  w.  j  a v  a2  s.  c o  m
    aliveIndexes = new ArrayList();
    for (int i = 0; i < instances.numInstances(); i++)
        aliveIndexes.add(i);
    mergePairs = new ArrayList();

    distanceFunction.setInstances(instances);

    // Distance Matrix Inititalization
    distanceMatrix = new Double[instances.numInstances()][instances.numInstances()];
    for (int i = 0; i < instances.numInstances(); i++) {
        for (int j = 0; j < instances.numInstances(); j++) {
            distanceMatrix[i][j] = distanceFunction.distance(instances.instance(i), instances.instance(j));
        }
    }
    while (aliveIndexes.size() > 1) {

        // Find Two Nearest Cluster
        MergePair bestPair = new MergePair(-1, -1, 0);
        for (int i = 0; i < aliveIndexes.size(); i++) {
            for (int j = i + 1; j < aliveIndexes.size(); j++) {
                int index_i = aliveIndexes.get(i), index_j = aliveIndexes.get(j);
                MergePair currentPair = new MergePair(index_i, index_j, distanceMatrix[index_i][index_j]);
                if (bestPair.i < 0 || bestPair.compareTo(currentPair) > 0)
                    bestPair = currentPair;
                else if (bestPair.compareTo(currentPair) == 0 && Math.random() < 0.5)
                    bestPair = currentPair;
            }
        }

        // Merge Two Nearest Cluster
        mergePairs.add(bestPair);
        int index_j = aliveIndexes.indexOf(bestPair.j);
        aliveIndexes.remove(index_j);

        // Update Distance Matrix
        for (int i = 0; i < aliveIndexes.size(); i++) {
            int index = aliveIndexes.get(i);
            if (index == bestPair.i)
                continue;
            double dist = Math.min(distanceMatrix[index][bestPair.i], distanceMatrix[index][bestPair.j]);
            if (this.linkage == COMPLETE_LINKAGE)
                dist = Math.max(distanceMatrix[index][bestPair.i], distanceMatrix[index][bestPair.j]);
            distanceMatrix[index][bestPair.i] = dist;
            distanceMatrix[bestPair.i][index] = dist;
        }
    }

    // Construct Cluster
    constuctCluster(numCluster);
}

From source file:aw_cluster.myKMeans.java

@Override
public void buildClusterer(Instances data) throws Exception {
    getCapabilities().testWithFail(data);

    Instances instances = new Instances(data);
    instances.setClassIndex(-1);//  w ww .ja v  a 2  s.  co  m

    if (instances.numInstances() == 0) {
        throw new RuntimeException("Dataset should not be empty");
    }

    assignments = new int[instances.numInstances()];
    centroid = new Instances(instances, numCluster);
    distanceFunction.setInstances(instances);
    squaredError = new double[numCluster];

    // Initialize Centroid Random From seed
    Random random = new Random(getSeedRandom());
    Instances tempInstances = new Instances(instances);

    int tI = tempInstances.numInstances() - 1;
    while (tI >= 0 && centroid.numInstances() < numCluster) {
        int indexCentroid = random.nextInt(tI + 1);
        centroid.add(tempInstances.instance(indexCentroid));
        tempInstances.swap(tI, indexCentroid);
        tI--;
    }

    tempInstances = null;

    boolean converged = false;
    while (!converged) {
        converged = true;
        numIteration++;
        for (int i = 0; i < instances.numInstances(); ++i) {
            Instance toCluster = instances.instance(i);
            int clusterResult = clusterInstanceProcess(toCluster, true);
            if (clusterResult != assignments[i])
                converged = false;
            assignments[i] = clusterResult;
        }

        // update centroid
        Instances[] TempI = new Instances[numCluster];
        centroid = new Instances(instances, numCluster);
        for (int i = 0; i < TempI.length; ++i) {
            TempI[i] = new Instances(instances, 0);
        }
        for (int i = 0; i < instances.numInstances(); ++i) {
            TempI[assignments[i]].add(instances.instance(i));
        }
        for (int i = 0; i < TempI.length; ++i) {
            moveCentroid(TempI[i]);
        }
        if (converged)
            squaredError = new double[numCluster];
        if (numIteration == maxIteration)
            converged = true;
        sizeEachCluster = new int[numCluster];
        for (int i = 0; i < numCluster; ++i) {
            sizeEachCluster[i] = TempI[i].numInstances();
        }

    }
}

From source file:binarytreesom.clustering.BinaryTreeSOMClustering.java

/**
  * Loading data from a given ARFF file which contains the data set.
  * @param filename path to the file which contains the data set
  * @return data set into Instances/*from w ww  . ja v  a 2  s .  com*/
  * @throws java.io.IOException
  */
public static Instances readArff(String filename) throws IOException {
    Instances data;
    BufferedReader reader = new BufferedReader(new FileReader(filename));
    data = new Instances(reader);
    reader.close();
    //data.setClassIndex(data.numAttributes() - 1);
    return data;
}