List of usage examples for weka.core Instances Instances
public Instances(Instances dataset)
From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java
License:Open Source License
/** * constructor of BPSO/*from w w w. ja va2s. c o m*/ * * @param fileName input data set * @param detail printing mode */ public BPSO(String fileName, int iteration, int popSize, boolean detail) { // initialize PSO parameters this.iteration = iteration; this.popSize = popSize; this.verbose = detail; rand = new Random(System.currentTimeMillis()); avgFitness = 0.0; selectedSample = new ArrayList<String>(); //tournamentSize = 2; // class ratio variables double c1 = 0.0; double c2 = 0.0; double ratio = 0.0; // load in the imbalanced data set try { dataset = new Instances(new BufferedReader(new FileReader(fileName))); dataset.setClassIndex(dataset.numAttributes() - 1); } catch (IOException ioe) { ioe.printStackTrace(); } // calculate the imbalanced ratio for (int i = 0; i < dataset.numInstances(); i++) { if (dataset.instance(i).classValue() == 0) { c1++; } else { c2++; } } if (c1 > c2) { majorLabel = 0; ratio = c2 / (c1 + c2); } else { majorLabel = 1; ratio = c1 / (c1 + c2); } System.out.println("-------------------- data stats ----------------------"); System.out.println("sample of class 0: " + c1); System.out.println("sample of class 1: " + c2); System.out.println("minority class ratio: " + ratio); }
From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java
License:Open Source License
/** * this method starts the under sampling procedure *//* www . ja v a 2 s . c o m*/ public void underSampling() { // create a copy of original data set for cross validation Instances randData = new Instances(dataset); // dividing the data set to 3 folds randData.stratify(3); for (int fold = 0; fold < 3; fold++) { // using the first 2 folds as internal training set. the last fold as the internal test set. internalTrain = randData.trainCV(3, fold); internalTest = randData.testCV(3, fold); // calculate the number of the major class samples in the internal training set majorSize = 0; for (int i = 0; i < internalTrain.numInstances(); i++) { if (internalTrain.instance(i).classValue() == majorLabel) { majorSize++; } } // class variable initialization dec = new DecimalFormat("##.####"); localBest = new double[popSize]; localBestParticles = new int[popSize][majorSize]; globalBest = Double.MIN_VALUE; globalBestParticle = new int[majorSize]; velocity = new double[popSize][majorSize]; particles = new int[popSize][majorSize]; searchSpace = new double[popSize][majorSize]; System.out.println("-------------------- parameters ----------------------"); System.out.println("CV fold = " + fold); System.out.println("inertia weight = " + w); System.out.println("c1,c2 = " + c1); System.out.println("iteration time = " + iteration); System.out.println("population size = " + popSize); // initialize BPSO initialization(); // perform optimization process findMaxFit(); // save optimization results to array list saveResults(); } // rank the selected samples and build the balanced dataset try { createBalanceData(); } catch (IOException ioe) { ioe.printStackTrace(); } }
From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java
License:Open Source License
/** * the target function in fitness form/*from w w w. j a v a2 s . c o m*/ * * @return classification accuracy */ public double ensembleClassify() { double fitnessValue = 0.0; double classifiersScore = 0.0; /* load in the modified data set */ try { Instances reducedSet = new Instances(new BufferedReader(new FileReader("reduced.arff"))); reducedSet.setClassIndex(reducedSet.numAttributes() - 1); // calculating the evaluation values using each classifier respectively if (verbose == true) { System.out.println(); System.out.println(" |----------J4.8-----------|"); System.out.println(" | | |"); } J48 tree = new J48(); classifiersScore = classify(tree, reducedSet, internalTest); fitnessValue += classifiersScore; if (verbose == true) { System.out.println(); System.out.println(" |-----3NearestNeighbor----|"); System.out.println(" | | |"); } IBk nn3 = new IBk(3); classifiersScore = classify(nn3, reducedSet, internalTest); fitnessValue += classifiersScore; if (verbose == true) { System.out.println(); System.out.println(" |--------NaiveBayes-------|"); System.out.println(" | | |"); } NaiveBayes nb = new NaiveBayes(); classifiersScore = classify(nb, reducedSet, internalTest); fitnessValue += classifiersScore; if (verbose == true) { System.out.println(); System.out.println(" |-------RandomForest------|"); System.out.println(" | | |"); } RandomForest rf5 = new RandomForest(); rf5.setNumTrees(5); classifiersScore = classify(rf5, reducedSet, internalTest); fitnessValue += classifiersScore; if (verbose == true) { System.out.println(); System.out.println(" |---------Logistic--------|"); System.out.println(" | | |"); } Logistic log = new Logistic(); classifiersScore = classify(log, reducedSet, internalTest); fitnessValue += classifiersScore; } catch (IOException ioe) { ioe.printStackTrace(); } fitnessValue /= 5; if (verbose == true) { System.out.println(); System.out.println("Fitness: " + fitnessValue); System.out.println("---------------------------------------------------"); } return fitnessValue; }
From source file:au.edu.usyd.it.yangpy.snp.GEsnpxPara.java
License:Open Source License
/** * this function perform genetic operations * * @param saveFlag append/write to the output file *//*from w w w . j a v a 2s . c o m*/ public void performGeneticOperation(int saveFlag) throws Exception { // initialize processing components // loading the raw data Instances rawData = new Instances(new BufferedReader(new FileReader(file))); rawData.setClassIndex(rawData.numAttributes() - 1); ParallelGenetic genetic = new ParallelGenetic(rawData, chroLen, popSize, terGener, mode, balance, diversity, numThread); genetic.initializeParameters(); genetic.initializeChromosomes(); genetic.evaluate(); for (int i = 1; i < genetic.getTerimateGeneration(); i++) { genetic.selectElitism(); genetic.selectUsingTournament(); genetic.crossover(); genetic.mutate(); genetic.generateNewGeneration(); genetic.evaluate(); } if (saveFlag == 0) genetic.saveBestChro(false); else genetic.saveBestChro(true); }
From source file:au.edu.usyd.it.yangpy.snp.ParallelGenetic.java
License:Open Source License
public void crossValidate() { // create a copy of original training set for CV Instances randData = new Instances(data); // divide the data set with x-fold stratify measure randData.stratify(foldSize);//w w w . ja va2 s . co m try { cvTrain = randData.trainCV(foldSize, foldIndex); cvTest = randData.testCV(foldSize, foldIndex); foldIndex++; if (foldIndex >= foldSize) { foldIndex = 0; } } catch (Exception e) { System.out.println(cvTest.toString()); } }
From source file:au.edu.usyd.it.yangpy.snp.ParallelGenetic.java
License:Open Source License
/** * constrain data set with a given SNP subset * // w w w. jav a 2s.com * @param cId chromosome Id * @param train training instances * @param test test instances */ public double computeFitess(int cId) throws Exception { Instances cTrain = new Instances(cvTrain); Instances cTest = new Instances(cvTest); int len = 0; for (int i = 0; i < chro[cId].length; i++) { if (chro[cId][i] != -1) { len++; } } int[] deleteList = new int[data.numAttributes() - 1 - len]; int delId = 0; for (int i = 0; i < data.numAttributes() - 1; i++) { boolean testContain = false; for (int j = 0; j < chro[cId].length; j++) { if (i == chro[cId][j]) { testContain = true; } } if (testContain == false) { deleteList[delId] = i; delId++; } } Arrays.sort(deleteList); // reverse the array for (int i = 0; i < deleteList.length / 2; ++i) { int temp = deleteList[i]; deleteList[i] = deleteList[deleteList.length - i - 1]; deleteList[deleteList.length - i - 1] = temp; } for (int i = 0; i < deleteList.length; i++) { cTrain.deleteAttributeAt(deleteList[i]); cTest.deleteAttributeAt(deleteList[i]); } //////////////////////////////////////////// // compute fitness double fitness = 0.0; //boolean useDiversity = false; if (mode.equals("v")) { System.out.println("---------------------------------------------"); System.out.println(" subset " + (cId + 1) + ":"); System.out.println(); } Ensemble classifier = new Ensemble(cTrain, cTest); classifier.ensemble(mode); double blockScore = classifier.blocking(); double voteScore = classifier.voting(); double diversityScore = 0.0; if (currGener < (terGener - (terGener / 5))) { if (diversity.equals("K")) { diversityScore = classifier.kappaDiversity(); } else { diversityScore = classifier.doubleFaultDiversity(); } } blockScore = Math.round(blockScore * 10000.0) / 10000.0; voteScore = Math.round(voteScore * 10000.0) / 10000.0; if (diversityScore != 0.0) { diversityScore = Math.round(diversityScore * 10000.0) / 10000.0; fitness = blockScore * 0.45 + voteScore * 0.45 + diversityScore * 0.1; } else { fitness = blockScore * 0.5 + voteScore * 0.5; } // average accuracy of five classifiers. if (mode.equals("v")) { System.out.println("block (average) AUC: " + blockScore); System.out.println("majority voting AUC: " + voteScore); if (diversityScore != 0.0) { if (diversity.equals("K")) { System.out.println("kappa diversity: " + diversityScore); } else { System.out.println("double fault diversity: " + diversityScore); } } } avgFitness += fitness; return fitness; }
From source file:aw_cluster.AW_Cluster.java
/** * @param args the command line arguments *///from www . j a v a2 s .c om public static void main(String[] args) throws Exception { // TODO code application logic here Scanner sc = new Scanner(System.in); Instances trainingData; ClusterEvaluation eval; String path; int pilihan; int jumlahCluster; int maxIter; int typeLinkage; do { System.out.println("Masukan pilihan algoritma: "); System.out.println("1. MyKMeans"); System.out.println("2. MyAgnes"); System.out.println("3. Exit"); System.out.print("Pilihan: "); pilihan = sc.nextInt(); if (pilihan == 1) { path = masukanFile(sc); System.out.println("Masukan jumlah cluster: "); jumlahCluster = sc.nextInt(); System.out.println("Masukan jumlah maksimum iterasi: "); maxIter = sc.nextInt(); BufferedReader data = new BufferedReader(new FileReader(path)); trainingData = new Instances(data); myKMeans kmeans = new myKMeans(); kmeans.setNumCluster(jumlahCluster); kmeans.setMaxIteration(maxIter); kmeans.buildClusterer(trainingData); eval = new ClusterEvaluation(); eval.setClusterer(kmeans); eval.evaluateClusterer(trainingData); System.out.println("Cluster Evaluation: " + eval.clusterResultsToString()); System.out.println(""); } else if (pilihan == 2) { path = masukanFile(sc); System.out.println("Masukan jumlah cluster: "); jumlahCluster = sc.nextInt(); typeLinkage = typeLinkage(sc); BufferedReader data = new BufferedReader(new FileReader(path)); trainingData = new Instances(data); myAgnes agnes = new myAgnes(); agnes.setNumCluster(jumlahCluster); agnes.setLinkage(typeLinkage); agnes.buildClusterer(trainingData); eval = new ClusterEvaluation(); eval.setClusterer(agnes); eval.evaluateClusterer(trainingData); System.out.println("Cluster Evaluation: " + eval.clusterResultsToString()); System.out.println(""); } } while (pilihan != 3); }
From source file:aw_cluster.myAgnes.java
@Override public void buildClusterer(Instances data) throws Exception { getCapabilities().testWithFail(data); instances = new Instances(data); instances.setClassIndex(-1);// w w w. j a v a2 s. c o m aliveIndexes = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) aliveIndexes.add(i); mergePairs = new ArrayList(); distanceFunction.setInstances(instances); // Distance Matrix Inititalization distanceMatrix = new Double[instances.numInstances()][instances.numInstances()]; for (int i = 0; i < instances.numInstances(); i++) { for (int j = 0; j < instances.numInstances(); j++) { distanceMatrix[i][j] = distanceFunction.distance(instances.instance(i), instances.instance(j)); } } while (aliveIndexes.size() > 1) { // Find Two Nearest Cluster MergePair bestPair = new MergePair(-1, -1, 0); for (int i = 0; i < aliveIndexes.size(); i++) { for (int j = i + 1; j < aliveIndexes.size(); j++) { int index_i = aliveIndexes.get(i), index_j = aliveIndexes.get(j); MergePair currentPair = new MergePair(index_i, index_j, distanceMatrix[index_i][index_j]); if (bestPair.i < 0 || bestPair.compareTo(currentPair) > 0) bestPair = currentPair; else if (bestPair.compareTo(currentPair) == 0 && Math.random() < 0.5) bestPair = currentPair; } } // Merge Two Nearest Cluster mergePairs.add(bestPair); int index_j = aliveIndexes.indexOf(bestPair.j); aliveIndexes.remove(index_j); // Update Distance Matrix for (int i = 0; i < aliveIndexes.size(); i++) { int index = aliveIndexes.get(i); if (index == bestPair.i) continue; double dist = Math.min(distanceMatrix[index][bestPair.i], distanceMatrix[index][bestPair.j]); if (this.linkage == COMPLETE_LINKAGE) dist = Math.max(distanceMatrix[index][bestPair.i], distanceMatrix[index][bestPair.j]); distanceMatrix[index][bestPair.i] = dist; distanceMatrix[bestPair.i][index] = dist; } } // Construct Cluster constuctCluster(numCluster); }
From source file:aw_cluster.myKMeans.java
@Override public void buildClusterer(Instances data) throws Exception { getCapabilities().testWithFail(data); Instances instances = new Instances(data); instances.setClassIndex(-1);// w ww .ja v a 2 s. co m if (instances.numInstances() == 0) { throw new RuntimeException("Dataset should not be empty"); } assignments = new int[instances.numInstances()]; centroid = new Instances(instances, numCluster); distanceFunction.setInstances(instances); squaredError = new double[numCluster]; // Initialize Centroid Random From seed Random random = new Random(getSeedRandom()); Instances tempInstances = new Instances(instances); int tI = tempInstances.numInstances() - 1; while (tI >= 0 && centroid.numInstances() < numCluster) { int indexCentroid = random.nextInt(tI + 1); centroid.add(tempInstances.instance(indexCentroid)); tempInstances.swap(tI, indexCentroid); tI--; } tempInstances = null; boolean converged = false; while (!converged) { converged = true; numIteration++; for (int i = 0; i < instances.numInstances(); ++i) { Instance toCluster = instances.instance(i); int clusterResult = clusterInstanceProcess(toCluster, true); if (clusterResult != assignments[i]) converged = false; assignments[i] = clusterResult; } // update centroid Instances[] TempI = new Instances[numCluster]; centroid = new Instances(instances, numCluster); for (int i = 0; i < TempI.length; ++i) { TempI[i] = new Instances(instances, 0); } for (int i = 0; i < instances.numInstances(); ++i) { TempI[assignments[i]].add(instances.instance(i)); } for (int i = 0; i < TempI.length; ++i) { moveCentroid(TempI[i]); } if (converged) squaredError = new double[numCluster]; if (numIteration == maxIteration) converged = true; sizeEachCluster = new int[numCluster]; for (int i = 0; i < numCluster; ++i) { sizeEachCluster[i] = TempI[i].numInstances(); } } }
From source file:binarytreesom.clustering.BinaryTreeSOMClustering.java
/** * Loading data from a given ARFF file which contains the data set. * @param filename path to the file which contains the data set * @return data set into Instances/*from w ww . ja v a 2 s . com*/ * @throws java.io.IOException */ public static Instances readArff(String filename) throws IOException { Instances data; BufferedReader reader = new BufferedReader(new FileReader(filename)); data = new Instances(reader); reader.close(); //data.setClassIndex(data.numAttributes() - 1); return data; }