List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:mx.itesm.web2mexadl.mvc.MvcAnalyzer.java
License:Open Source License
/** * Generate the architecture document associated to the specified web * application data./* w ww. j a v a 2 s.c o m*/ * * @param dependencies * List containing the dependencies for each class to classify. * @param internalPackages * Project's internal packages. * @return Map containing the classification layer for each class. * @throws Exception * If an Exception occurs during classification. */ private static Map<String, Layer> generateArchitecture(final List<ClassDependencies> dependencies, final Map<String, Set<String>> internalPackages, final File outputDir) throws Exception { int viewCount; int modelCount; int instanceLayer; Instance instance; boolean valueFound; int controllerCount; Instances instances; String instanceType; String[] typeValues; Layer componentLayer; String[] suffixValues; Layer dependencyLayer; FastVector attributes; String[] externalApiValues; Map<String, Layer> returnValue; Set<String> currentPackageContent; Map<String, Layer> packagesClassification; Map<String, String[]> externalApiPackages; StringBuilder modelPackages; StringBuilder viewPackages; StringBuilder controllerPackages; // Model variables attributes = new FastVector(); for (Variable variable : Variable.values()) { attributes.addElement(variable.getAttribute()); } // Layer variable attributes.addElement(Layer.attribute); // Set the test instances, the Layer variable is unknown instances = new Instances("mvc", attributes, 0); instances.setClassIndex(Variable.values().length); // Valid suffixes to look for in the class names suffixValues = Util.getPropertyValues(Util.Variable.Suffix.getVariableName()); // Valid file types to look for in the component names typeValues = Util.getPropertyValues(Util.Variable.Type.getVariableName()); // Valid external api packages to look for in the classes dependencies externalApiValues = Util.getPropertyValues(Util.Variable.ExternalAPI.getVariableName()); externalApiPackages = new HashMap<String, String[]>(externalApiValues.length); for (int i = 0; i < externalApiValues.length; i++) { if (!externalApiValues[i].equals("none")) { externalApiPackages.put(externalApiValues[i], Util.getPropertyValues("externalApi." + externalApiValues[i] + ".packages")); } } returnValue = new HashMap<String, Layer>(dependencies.size()); for (ClassDependencies classDependencies : dependencies) { // Variables + Layer instance = new Instance(Variable.values().length + 1); // Type instanceType = "java"; for (String validType : typeValues) { if (classDependencies.getClassName().endsWith("." + validType)) { instanceType = validType; break; } } instance.setValue(Variable.Type.getAttribute(), instanceType); // ExternalAPI valueFound = false; externalApi: for (String externalApi : externalApiValues) { if (externalApi.equals("none")) { continue; } // Check if any of the class' external dependencies match with // one of the key external dependencies if (classDependencies.getExternalDependencies() != null) { for (String externalDependency : classDependencies.getExternalDependencies()) { for (String externalPackage : externalApiPackages.get(externalApi)) { if (externalDependency.toLowerCase().startsWith(externalPackage)) { valueFound = true; instance.setValue(Variable.ExternalAPI.getAttribute(), externalApi); break externalApi; } } } } } // No key external dependency found if (!valueFound) { instance.setValue(Variable.ExternalAPI.getAttribute(), "none"); } // Suffix valueFound = false; for (String suffix : suffixValues) { if (classDependencies.getClassName().toLowerCase().endsWith(suffix)) { valueFound = true; instance.setValue(Variable.Suffix.getAttribute(), suffix); break; } } // No key suffix found if (!valueFound) { instance.setValue(Variable.Suffix.getAttribute(), "none"); } // Layer, the unknown variable instance.setMissing(Layer.attribute); instances.add(instance); instance.setDataset(instances); try { instanceLayer = (int) Util.classifier.classifyInstance(instance); } catch (Exception e) { // Default value instanceLayer = 0; logger.severe("Unable to classify: " + instance); } returnValue.put(classDependencies.getClassName(), Layer.values()[instanceLayer]); logger.info( classDependencies.getClassName() + " : " + returnValue.get(classDependencies.getClassName())); } // Check for any invalid relation viewPackages = new StringBuilder(); modelPackages = new StringBuilder(); controllerPackages = new StringBuilder(); packagesClassification = new HashMap<String, Layer>(internalPackages.size()); for (String currentPackage : internalPackages.keySet()) { modelCount = viewCount = controllerCount = 0; currentPackageContent = internalPackages.get(currentPackage); for (String component : currentPackageContent) { componentLayer = returnValue.get(component); if (componentLayer == Layer.Model) { modelCount++; } else if (componentLayer == Layer.View) { viewCount++; } else if (componentLayer == Layer.Controller) { controllerCount++; } } if ((modelCount > viewCount) && (modelCount > controllerCount)) { packagesClassification.put(currentPackage, Layer.Model); Util.addImplementationPackage(modelPackages, currentPackage); } else if ((viewCount > modelCount) && (viewCount > controllerCount)) { packagesClassification.put(currentPackage, Layer.View); Util.addImplementationPackage(viewPackages, currentPackage); } else if ((controllerCount > viewCount) && (controllerCount > modelCount)) { packagesClassification.put(currentPackage, Layer.Controller); Util.addImplementationPackage(controllerPackages, currentPackage); } else { packagesClassification.put(currentPackage, null); } } for (ClassDependencies classDependencies : dependencies) { // Code relations valueFound = false; componentLayer = returnValue.get(classDependencies.getClassName()); if (classDependencies.getInternalDependencies() != null) { for (String internalDependency : classDependencies.getInternalDependencies()) { dependencyLayer = returnValue.get(internalDependency); if (!componentLayer.isValidRelation(dependencyLayer)) { valueFound = true; returnValue.put(classDependencies.getClassName(), Layer.valueOf("Invalid" + componentLayer)); logger.info("Invalid relation detected between: " + classDependencies.getClassName() + " and " + internalDependency); } } } // Package relations if (!valueFound) { dependencyLayer = packagesClassification.get(classDependencies.getPackageName()); if ((dependencyLayer != null) && (componentLayer != dependencyLayer)) { returnValue.put(classDependencies.getClassName(), Layer.valueOf("Invalid" + componentLayer)); } } } // Export MexADL architecture MvcAnalyzer.exportToMexADL(outputDir, modelPackages.toString(), controllerPackages.toString(), viewPackages.toString()); return returnValue; }
From source file:myclusterer.MyKMeans.java
@Override public void buildClusterer(Instances instances) throws Exception { int N = instances.numInstances(); if (K < 1) K = 1;/*from w w w.j a va 2 s .c o m*/ if (N == 0 || N < K) return; getCapabilities().testWithFail(instances); this.instances = instances; distanceFunction.setInstances(instances); // assign first centroids randomly Random rand = new Random(); Set<Integer> centroidIdx = new HashSet<>(); while (centroidIdx.size() < K) { int x = rand.nextInt(N); centroidIdx.add(x); } centroids = new Instances(instances, K); centroidIdx.forEach((idx) -> { centroids.add(instances.instance(idx)); }); int[] prevCluster = new int[N]; for (int i = 0; i < N; ++i) prevCluster[i] = -1; List<Integer>[] tmpCluster = new List[K]; for (int i = 0; i < K; ++i) tmpCluster[i] = new ArrayList<>(); boolean converged = false; iterations = 0; while (!converged && iterations < maxIterations) { ++iterations; converged = true; for (int i = 0; i < K; ++i) tmpCluster[i].clear(); for (int i = 0; i < N; ++i) { int cluster = clusterInstance(instances.instance(i)); if (prevCluster[i] != cluster) { converged = false; prevCluster[i] = cluster; } tmpCluster[cluster].add(i); } // update centroid centroids = new Instances(instances, K); for (int i = 0; i < K; ++i) { Instances members = new Instances(instances, N); for (Integer member : tmpCluster[i]) members.add(instances.instance(member)); centroids.add(createCentroid(members)); } } clusters = new List[K]; for (int i = 0; i < K; ++i) { clusters[i] = new ArrayList<>(); for (Integer member : tmpCluster[i]) clusters[i].add(instances.instance(member)); } }
From source file:NaiveBayes.NaiveBayes.java
public String classify(Instances init) { int nAttributes = init.numAttributes(); Instance ins = new DenseInstance(nAttributes); Instances newData = init; Scanner s = new Scanner(System.in); Double in;//from ww w . j a v a 2 s. c o m System.out.println("Jumlah Atribut : " + (nAttributes - 1)); for (int i = 1; i <= nAttributes - 1; i++) { //Attribute a = train.attribute(i - 1); System.out.print("Attribute " + i + " : "); in = s.nextDouble(); ins.setValue(i, in); //newIns[i] = in; } newData.add(ins); double nomorKelas = classifyInstance(newData.lastInstance()); return init.attribute(init.numAttributes() - 1).value((int) nomorKelas); }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
public boolean getAttribCombos(Instances i, double cv) { List r = getAttribCombos(i.numAttributes(), cv); if (r == null) return false; for (Iterator ii = r.iterator(); ii.hasNext();) { double[] d = (double[]) ii.next(); i.add(new Instance(1., d)); }//from w w w . jav a 2s . c o m return true; }
From source file:net.sf.jclal.sampling.supervised.Resample.java
License:Open Source License
/** * creates the subsample with replacement * * @param dataSet The dataset to extract a percent of instances * @param sampleSize the size to generate * @param actualClasses The actual classes * @param classIndices The indexes of the classes *///w ww.ja v a 2s . c om public void createSubsampleWithReplacement(WekaDataset dataSet, int sampleSize, int actualClasses, int[] classIndices) { int originalSize = dataSet.getNumInstances(); Set<Integer> indexes = new HashSet<Integer>(); Instances labeledInstances = new Instances(dataSet.getDataset(), sampleSize); for (int i = 0; i < sampleSize; i++) { int index = 0; if (getRandgen().uniform(0, 1) < biasToUniformClass) { // Pick a random class (of those classes that actually appear) int cIndex = getRandgen().choose(0, actualClasses); for (int j = 0, k = 0; j < classIndices.length - 1; j++) { if ((classIndices[j] != classIndices[j + 1]) && (k++ >= cIndex)) { // Pick a random instance of the designated class index = classIndices[j] + getRandgen().choose(0, classIndices[j + 1] - classIndices[j]); break; } } } else { index = getRandgen().choose(0, originalSize); } labeledInstances.add((Instance) dataSet.instance(index).copy()); indexes.add(index); } setLabeledData(new WekaDataset(labeledInstances)); ArrayList<Container> indexesArray = new ArrayList<Container>(); for (Integer i : indexes) { indexesArray.add(new Container(i, i)); } //The array is ordered in descendent order OrderUtils.mergeSort(indexesArray, true); //Copy the entire dataset into unlabeled set Instances unlabeledInstances = new Instances(dataSet.getDataset()); //remove the instances that have been selected previously for (Container pair : indexesArray) { unlabeledInstances.remove(Integer.parseInt(pair.getValue().toString())); } setUnlabeledData(new WekaDataset(unlabeledInstances)); //clean up labeledInstances.clear(); unlabeledInstances.clear(); indexes.clear(); indexesArray.clear(); labeledInstances = null; unlabeledInstances = null; indexes = null; indexesArray = null; }
From source file:net.sf.jclal.sampling.supervised.Resample.java
License:Open Source License
/** * creates the subsample without replacement * * @param dataSet The dataset to extract a percent of instances * @param sampleSize The size to generate * @param actualClasses The actual classes * @param classIndices The indexes of the classes */// w ww . j a v a 2 s.c om public void createSubsampleWithoutReplacement(WekaDataset dataSet, int sampleSize, int actualClasses, int[] classIndices) { int origSize = dataSet.getNumInstances(); if (sampleSize > origSize) { sampleSize = origSize; System.err.println( "Resampling without replacement can only use percentage <=100% - " + "Using full dataset!"); } List<Integer>[] indices = new ArrayList[classIndices.length - 1]; List<Integer>[] indicesNew = new ArrayList[classIndices.length - 1]; // generate list of all indices to draw from for (int i = 0; i < classIndices.length - 1; i++) { indices[i] = new ArrayList<Integer>(classIndices[i + 1] - classIndices[i]); indicesNew[i] = new ArrayList<Integer>(indices[i].size()); for (int n = classIndices[i]; n < classIndices[i + 1]; n++) { indices[i].add(n); } } // draw X samples int currentSize = origSize; for (int i = 0; i < sampleSize; i++) { int index = 0; if (getRandgen().uniform(0, 1) < biasToUniformClass) { // Pick a random class (of those classes that actually appear) int cIndex = getRandgen().choose(0, actualClasses); for (int j = 0, k = 0; j < classIndices.length - 1; j++) { if ((classIndices[j] != classIndices[j + 1]) && (k++ >= cIndex)) { // no more indices for this class left, try again if (indices[j].isEmpty()) { i--; break; } // Pick a random instance of the designated class index = getRandgen().choose(0, indices[j].size()); indicesNew[j].add(indices[j].get(index)); indices[j].remove(index); break; } } } else { index = getRandgen().choose(0, currentSize); for (int n = 0; n < actualClasses; n++) { if (index < indices[n].size()) { indicesNew[n].add(indices[n].get(index)); indices[n].remove(index); break; } else { index -= indices[n].size(); } } currentSize--; } } // sort indices if (isInvertSelection()) { //Copy indicesNew into indicesNewTemp List<Integer>[] indicesNewTemp = new ArrayList[indicesNew.length]; int index = 0; for (List<Integer> list : indicesNew) { indicesNewTemp[index++] = new ArrayList(list); } //Copy indices into indicesNew indicesNew = new ArrayList[indices.length]; index = 0; for (List<Integer> list : indices) { indicesNew[index++] = new ArrayList(list); } //Copy indicesNewTemp into indices indices = indicesNewTemp; } else { for (int i = 0; i < indicesNew.length; i++) { Collections.sort(indicesNew[i]); } } Instances labeledInstances = new Instances(dataSet.getDataset(), sampleSize); // addAll to ouput for (int i = 0; i < indicesNew.length; i++) { for (int n = 0; n < indicesNew[i].size(); n++) { labeledInstances.add((Instance) dataSet.instance(indicesNew[i].get(n)).copy()); } } setLabeledData(new WekaDataset(labeledInstances)); Instances unlabeledInstances = new Instances(dataSet.getDataset(), origSize - sampleSize); // addAll to ouput for (int i = 0; i < indices.length; i++) { for (int n = 0; n < indices[i].size(); n++) { unlabeledInstances.add((Instance) dataSet.instance(indices[i].get(n)).copy()); } } setUnlabeledData(new WekaDataset(unlabeledInstances)); // clean up for (int i = 0; i < indices.length; i++) { indices[i].clear(); indicesNew[i].clear(); } indices = null; indicesNew = null; labeledInstances.clear(); unlabeledInstances.clear(); labeledInstances = null; unlabeledInstances = null; }
From source file:net.sf.jclal.sampling.unsupervised.Resample.java
License:Open Source License
/** * creates the subsample with replacement * * @param dataSet The dataset to extract a percent of instances * @param sampleSize the size to generate *///from w w w. j a v a2s. c o m public void createSubsampleWithReplacement(IDataset dataSet, int sampleSize) { int origSize = dataSet.getNumInstances(); Set<Integer> indexes = new HashSet<Integer>(); Instances labeledInstances = new Instances(dataSet.getDataset(), sampleSize); //Fill the labeled set for (int i = 0; i < sampleSize; i++) { int index = getRandgen().choose(0, origSize); labeledInstances.add((Instance) dataSet.instance(index).copy()); indexes.add(index); } if (dataSet instanceof WekaDataset) { setLabeledData(new WekaDataset(labeledInstances)); } if (dataSet instanceof MulanDataset) { setLabeledData(new MulanDataset(labeledInstances, ((MulanDataset) dataSet).getLabelsMetaData())); } ArrayList<Container> indexesArray = new ArrayList<Container>(); for (Integer i : indexes) { indexesArray.add(new Container(i, i)); } //The array is ordered in descendent order OrderUtils.mergeSort(indexesArray, true); //Copy the entire dataset into unlabeled set Instances unlabeledInstances = new Instances(dataSet.getDataset()); //remove the instances that have been selected previously for (Container pair : indexesArray) { unlabeledInstances.remove(Integer.parseInt(pair.getValue().toString())); } if (dataSet instanceof WekaDataset) { setUnlabeledData(new WekaDataset(unlabeledInstances)); } if (dataSet instanceof MulanDataset) { setUnlabeledData(new MulanDataset(unlabeledInstances, ((MulanDataset) dataSet).getLabelsMetaData())); } // clean up unlabeledInstances.clear(); labeledInstances.clear(); unlabeledInstances = null; labeledInstances = null; indexes.clear(); indexesArray.clear(); indexes = null; indexesArray = null; }
From source file:net.sf.jclal.sampling.unsupervised.Resample.java
License:Open Source License
/** * creates the subsample without replacement * * @param dataSet The dataset to extract a percent of instances * @param sampleSize the size to generate *//*from w ww . ja v a2s.co m*/ public void createSubsampleWithoutReplacement(IDataset dataSet, int sampleSize) { int origSize = dataSet.getNumInstances(); if (sampleSize > origSize) { sampleSize = origSize; System.err.println( "Resampling with replacement can only use percentage <=100% - " + "Using full dataset!"); } List<Integer> indixes = new ArrayList<Integer>(origSize); List<Integer> indixesNew = new ArrayList<Integer>(sampleSize); // generate list of all indices to draw from for (int i = 0; i < origSize; i++) { indixes.add(i); } // draw X random indices (selected ones get removed before next draw) for (int i = 0; i < sampleSize; i++) { int index = getRandgen().choose(0, indixes.size()); indixesNew.add(indixes.get(index)); indixes.remove(index); } if (isInvertSelection()) { List<Integer> indixesNewTemp = new ArrayList<Integer>(indixesNew); indixesNew = indixes; indixes = new ArrayList<Integer>(indixesNewTemp); } else { Collections.sort(indixesNew); } Instances labeledInstances = new Instances(dataSet.getDataset(), sampleSize); //Fill the labeled set for (int i = 0; i < indixesNew.size(); i++) { labeledInstances.add((Instance) dataSet.instance(indixesNew.get(i)).copy()); } if (dataSet instanceof WekaDataset) { setLabeledData(new WekaDataset(labeledInstances)); } if (dataSet instanceof MulanDataset) { setLabeledData(new MulanDataset(labeledInstances, ((MulanDataset) dataSet).getLabelsMetaData())); } Instances unlabeledInstances = new Instances(dataSet.getDataset(), origSize - sampleSize); //Fill the unlabeled set for (int i = 0; i < indixes.size(); i++) { unlabeledInstances.add((Instance) dataSet.instance(indixes.get(i)).copy()); } if (dataSet instanceof WekaDataset) { setUnlabeledData(new WekaDataset(unlabeledInstances)); } if (dataSet instanceof MulanDataset) { setUnlabeledData(new MulanDataset(unlabeledInstances, ((MulanDataset) dataSet).getLabelsMetaData())); } // clean up labeledInstances.clear(); unlabeledInstances.clear(); indixes.clear(); indixesNew.clear(); labeledInstances = null; unlabeledInstances = null; indixes = null; indixesNew = null; }
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.AbstractClusteringStrategy.java
License:Apache License
/** * This method creates a new instance set based on the available * behaviorModelsAbsolute.// www.j a v a2s . c o m * * @param behaviorModelsAbsolute * @return instance set */ protected Instances getInstances(BehaviorModelAbsolute[] behaviorModelsAbsolute) throws Exception { // init the fastVector with attributesNames from the first // behaviorModel. FastVector fastVector = getFastVector(behaviorModelsAbsolute[0]); // create empty instance set with the number of behaviorModelsRelative. Instances instances = new Instances("BehaviorModelAbsoluteInstanceSet", fastVector, behaviorModelsAbsolute.length); // set the last attribute as class index instances.setClassIndex(instances.numAttributes() - 1); // Each behaviorModelsRelative will be transformed to an instance. To do // that, that transition matrix will be // transformed in a vector. Set number of attributes of instance: n x (n // +1) exit state // Matrix. for (BehaviorModelAbsolute behaviorModelAbsolute : behaviorModelsAbsolute) { // retieve instance from behaviorModelRelative Instance instance = getInstance(behaviorModelAbsolute, instances); // add instance to instanceset, at the end of the set instances.add(instance); } // save input data as arff file. This arff file can be opened with weka // application. ArffSaver saver = new ArffSaver(); saver.setInstances(instances); saver.setFile(new File(CommandLineArgumentsHandler.getOutputDirectory() + "/data_clustering.arff")); saver.writeBatch(); // Remove UseLess // weka.filters.unsupervised.attribute.RemoveUseless filterUseLess = new // weka.filters.unsupervised.attribute.RemoveUseless(); // filterUseLess.setInputFormat(instances); // Instances returnInstances = Filter.useFilter(instances, // filterUseLess); // filter instances weka.filters.unsupervised.attribute.Remove filter = new weka.filters.unsupervised.attribute.Remove(); filter.setAttributeIndices("" + (instances.classIndex() + 1)); filter.setInputFormat(instances); Instances filteredInstances = Filter.useFilter(instances, filter); return filteredInstances; }
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.ClusteringTask.java
License:Open Source License
/** * Creates the weka data set for clustering of samples * * @param rawData//from w w w . j a v a 2 s . co m * Data extracted from selected Raw data files and rows. * @return Weka library data set */ private Instances createSampleWekaDataset(double[][] rawData) { FastVector attributes = new FastVector(); for (int i = 0; i < rawData[0].length; i++) { String varName = "Var" + i; Attribute var = new Attribute(varName); attributes.addElement(var); } if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { Attribute name = new Attribute("name", (FastVector) null); attributes.addElement(name); } Instances data = new Instances("Dataset", attributes, 0); for (int i = 0; i < rawData.length; i++) { double[] values = new double[data.numAttributes()]; System.arraycopy(rawData[i], 0, values, 0, rawData[0].length); if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { values[data.numAttributes() - 1] = data.attribute("name") .addStringValue(this.selectedRawDataFiles[i].getName()); } Instance inst = new SparseInstance(1.0, values); data.add(inst); } return data; }