List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:DiversifyQuery.DivTopK.java
/** * Sets the format of the filtered instances that are output. I.e. will * include k attributes each shapelet distance and a class value * * @param inputFormat the format of the input data * @return a new Instances object in the desired output format * @throws Exception if all required parameters of the filter are not * initialised correctly//from ww w. j a v a2 s .co m */ protected Instances determineOutputFormat(Instances inputFormat, ArrayList<LegacyShapelet> shapelets) throws Exception { //Set up instances size and format. //int length = this.numShapelets; int length = shapelets.size(); FastVector atts = new FastVector(); String name; for (int i = 0; i < length; i++) { name = "Shapelet_" + i; atts.addElement(new Attribute(name)); } if (inputFormat.classIndex() >= 0) { //Classification set, set class //Get the class values as a fast vector Attribute target = inputFormat.attribute(inputFormat.classIndex()); FastVector vals = new FastVector(target.numValues()); for (int i = 0; i < target.numValues(); i++) { vals.addElement(target.value(i)); } atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals)); } Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts, inputFormat.numInstances()); if (inputFormat.classIndex() >= 0) { result.setClassIndex(result.numAttributes() - 1); } return result; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
/** * Sets the format of the filtered instances that are output. I.e. will * include k attributes each shapelet distance and a class value * * @param inputFormat the format of the input data * @return a new Instances object in the desired output format * @throws Exception if all required parameters of the filter are not * initialised correctly/*from ww w .j av a 2s . c o m*/ */ @Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { if (this.numShapelets < 1) { throw new Exception( "ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1"); } //Set up instances size and format. //int length = this.numShapelets; int length = this.shapelets.size(); FastVector atts = new FastVector(); String name; for (int i = 0; i < length; i++) { name = "Shapelet_" + i; atts.addElement(new Attribute(name)); } if (inputFormat.classIndex() >= 0) { //Classification set, set class //Get the class values as a fast vector Attribute target = inputFormat.attribute(inputFormat.classIndex()); FastVector vals = new FastVector(target.numValues()); for (int i = 0; i < target.numValues(); i++) { vals.addElement(target.value(i)); } atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals)); } Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts, inputFormat.numInstances()); if (inputFormat.classIndex() >= 0) { result.setClassIndex(result.numAttributes() - 1); } return result; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
@Override public Instances process(Instances data) throws Exception { if (this.numShapelets < 1) { throw new Exception( "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets"); }/*w w w . j a va 2 s . c om*/ int maxPossibleLength = data.instance(0).numAttributes() - 1; if (data.classIndex() < 0) { throw new Exception("Require that the class be set for the ShapeletTransform"); } if (this.minShapeletLength < 1 || this.maxShapeletLength < 1 || this.maxShapeletLength < this.minShapeletLength || this.maxShapeletLength > maxPossibleLength) { throw new Exception("Shapelet length parameters initialised incorrectly"); } //Sort data in round robin order dataSourceIDs = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { dataSourceIDs[i] = i; } // data = roundRobinData(data, dataSourceIDs); if (this.shapeletsTrained == false) { // shapelets discovery has not yet been caried out, so do so this.shapelets = findDiversityTopKShapelets(this.numShapelets, data, this.minShapeletLength, this.maxShapeletLength); // get k shapelets ATTENTION this.shapeletsTrained = true; if (!supressOutput) { System.out.println(shapelets.size() + " Shapelets have been generated"); } } Instances output = determineOutputFormat(data); // for each data, get distance to each shapelet and create new instance for (int i = 0; i < data.numInstances(); i++) { // for each data Instance toAdd = new Instance(this.shapelets.size() + 1); int shapeletNum = 0; for (LegacyShapelet s : this.shapelets) { double dist = subseqDistance(s.content, data.instance(i)); toAdd.setValue(shapeletNum++, dist); } toAdd.setValue(this.shapelets.size(), data.instance(i).classValue()); output.add(toAdd); } return output; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
public ArrayList<LegacyShapelet> findDiversityTopKShapelets(int numShapelets, Instances data, int minShaepeletLength, int maxShapeletLength) throws Exception { ArrayList<LegacyShapelet> kShapelets = new ArrayList<LegacyShapelet>(); //store up to k shapeles overall ArrayList<LegacyShapelet> tempKShapelets; //store temporary k shapelets each iteration ArrayList<LegacyShapelet> seriesShapelets = new ArrayList<LegacyShapelet>(); //store all temporary k shapelets each itreration for diversifying process int saxLENGTH = 15; int w = 4;/*from w w w . j a v a 2 s . co m*/ int R = 10; double percentMask = 0.25; int topK = 10; TreeMap<Double, Integer> classDistributions = getClassDistributions(data); //calc info gain//calc info gain//calc info gain//calc info gain int numClass = classDistributions.size(); if (!supressOutput) { System.out.println("Processing data: "); } int numInstances = data.numInstances(); for (int length = minShaepeletLength; length <= maxShapeletLength; length++) { createSAXList(length, saxLENGTH, w, data); randomProjection(R, percentMask, saxLENGTH); scoreAllSAX(R, numClass, data); tempKShapelets = findBestTopKSAX(length, topK, data, numClass); for (int i = 0; i < tempKShapelets.size(); i++) { seriesShapelets.add(tempKShapelets.get(i)); } USAXMap.clear(); scoreList.clear(); } ArrayList<GraphNode> Graph = new ArrayList<GraphNode>(); Graph = constructShapeletGraph(seriesShapelets, data); kShapelets = DiversifyTopKQuery(Graph, numShapelets); return kShapelets; // return seriesShapelets; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
public ArrayList<LegacyShapelet> findBestTopKSAX(int subsequenceLength, int top_k, Instances data, int numClass) { int numObject = data.numInstances(); ArrayList<Point> Dist = new ArrayList<>(numObject); int word;//from w w w . j a v a2 s . c o m int kk; double gain, distanceThreshold, gap; int qObject, qPosition; USAXElmentType usax; TreeMap<Double, Integer> classDistributions = getClassDistributions(data); // used to calc info gain double[] candidate = new double[subsequenceLength]; ArrayList<LegacyShapelet> shapelets = new ArrayList<LegacyShapelet>(); if (top_k > 0) { Collections.sort(scoreList, new Comparator<Map.Entry<Integer, Double>>() { @Override public int compare(Map.Entry<Integer, Double> a, Map.Entry<Integer, Double> b) { return ((Double) b.getValue()).compareTo((Double) a.getValue()); } }); } for (int k = 0; k < Math.min(top_k, (int) scoreList.size()); k++) { word = scoreList.get(k).getKey(); usax = USAXMap.get(word); for (kk = 0; kk < Math.min((int) usax.SAXIdArrayList.size(), 1); kk++) { qObject = usax.SAXIdArrayList.get(kk).x; qPosition = usax.SAXIdArrayList.get(kk).y; for (int i = 0; i < subsequenceLength; i++) { candidate[i] = data.instance(qObject).value(qPosition + i); } candidate = zNorm(candidate, false); LegacyShapelet candidateShapelet = checkCandidate(candidate, data, qObject, qPosition, classDistributions, null); shapelets.add(candidateShapelet); } } return shapelets; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
protected void createSAXList(int subsequenceLength, int saxLength, int w, Instances data) { w = (int) Math.ceil((double) subsequenceLength / saxLength); saxLength = (int) Math.ceil((double) subsequenceLength / w); double ex, ex2, mean, std; double[] sumSegment = new double[saxLength]; //sumsegment?? int[] elementSegment = new int[saxLength]; int j, jSt, k, slot, objectId; double dataPoint; int word, previousWord; for (k = 0; k < saxLength; k++) { elementSegment[k] = w;//from w w w . j ava 2s . c o m } elementSegment[saxLength - 1] = subsequenceLength - (saxLength - 1) * w; // w for (objectId = 0; objectId < data.numInstances(); objectId++) { ex = ex2 = 0; previousWord = -1; for (k = 0; k < saxLength; k++) { sumSegment[k] = 0; } double[] timeSeriesObject = data.instance(objectId).toDoubleArray(); //case 1: Initial for (j = 0; (j < timeSeriesObject.length - 1) && (j < subsequenceLength); j++) { dataPoint = timeSeriesObject[j]; ex += dataPoint; ex2 += dataPoint * dataPoint; slot = (int) Math.floor(j / w); //slotw? sumSegment[slot] += dataPoint; // } //case 2: slightly update for (j = j; j <= timeSeriesObject.length - 1; j++) { jSt = j - subsequenceLength; mean = ex / subsequenceLength; std = Math.sqrt(ex2 / subsequenceLength - mean * mean); //create SAX from sumSegment word = createSAXWord(sumSegment, elementSegment, mean, std, saxLength); if (word != previousWord) { previousWord = word; if (!(USAXMap.containsKey(word))) { USAXMap.put(word, null); USAXElmentType usax = new USAXElmentType(); usax.objectHashSet.add(objectId); usax.SAXIdArrayList.add(new Point(objectId, jSt)); USAXMap.put(word, usax); } else { USAXMap.get(word).objectHashSet.add(objectId); USAXMap.get(word).SAXIdArrayList.add(new Point(objectId, jSt)); //////// } } /// for next updata if (j < timeSeriesObject.length - 1) { ex -= timeSeriesObject[jSt]; ex2 -= timeSeriesObject[jSt] * timeSeriesObject[jSt]; for (k = 0; k < saxLength - 1; k++) { sumSegment[k] -= timeSeriesObject[jSt + k * w]; sumSegment[k] += timeSeriesObject[jSt + (k + 1) * w]; } sumSegment[k] -= timeSeriesObject[jSt + k * w]; sumSegment[k] += timeSeriesObject[jSt + Math.min((k + 1) * w, subsequenceLength)]; dataPoint = timeSeriesObject[j]; ex += dataPoint; ex2 += dataPoint * dataPoint; } } } }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
/** * protected method to check a candidate shapelet. Functions by passing in * the raw data, and returning an assessed ShapeletTransform object. * * @param candidate the data from the candidate ShapeletTransform * @param data the entire data set to compare the candidate to * @param seriesId series id from the dataset that the candidate came from * @param startPos start position in the series where the candidate came * from//from w w w .jav a2s .c o m * @param classDistribution a TreeMap<Double, Integer> in the form of * <Class Value, Frequency> to describe the dataset composition * @param qualityBound * @return a fully-computed ShapeletTransform, including the quality of this * candidate */ protected LegacyShapelet checkCandidate(double[] candidate, Instances data, int seriesId, int startPos, TreeMap classDistribution, QualityBound.ShapeletQualityBound qualityBound) { // create orderline by looping through data set and calculating the subsequence // distance from candidate to all data, inserting in order. ArrayList<OrderLineObj> orderline = new ArrayList<OrderLineObj>(); boolean pruned = false; for (int i = 0; i < data.numInstances(); i++) { //Check if it is possible to prune the candidate if (qualityBound != null) { if (qualityBound.pruneCandidate()) { pruned = true; break; } } double distance = 0.0; if (i != seriesId) { distance = subseqDistance(candidate, data.instance(i)); } double classVal = data.instance(i).classValue(); // without early abandon, it is faster to just add and sort at the end orderline.add(new OrderLineObj(distance, classVal)); //Update qualityBound - presumably each bounding method for different quality measures will have a different update procedure. if (qualityBound != null) { qualityBound.updateOrderLine(orderline.get(orderline.size() - 1)); } } // note: early abandon entropy pruning would appear here, but has been ommitted // in favour of a clear multi-class information gain calculation. Could be added in // this method in the future for speed up, but distance early abandon is more important //If shapelet is pruned then it should no longer be considered in further processing if (pruned) { return null; } else { // create a shapelet object to store all necessary info, i.e. LegacyShapelet shapelet = new LegacyShapelet(candidate, seriesId, startPos, this.qualityMeasure); shapelet.calculateQuality(orderline, classDistribution); shapelet.calcInfoGainAndThreshold(orderline, classDistribution); return shapelet; } }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
/** * Private method to calculate the class distributions of a dataset. Main * purpose is for computing shapelet qualities. * * @param data the input data set that the class distributions are to be * derived from//from www . ja v a 2s . co m * @return a TreeMap<Double, Integer> in the form of * <Class Value, Frequency> */ public static TreeMap<Double, Integer> getClassDistributions(Instances data) { TreeMap<Double, Integer> classDistribution = new TreeMap<Double, Integer>(); double classValue; for (int i = 0; i < data.numInstances(); i++) { classValue = data.instance(i).classValue(); boolean classExists = false; for (Double d : classDistribution.keySet()) { if (d == classValue) { int temp = classDistribution.get(d); temp++; classDistribution.put(classValue, temp); classExists = true; } } if (classExists == false) { classDistribution.put(classValue, 1); } } return classDistribution; }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);//from w w w . j av a 2 s .c om // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Prepare output scores String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".csv"), sb.toString()); // Output prediction arff DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".predicted.arff", predictedData); // Output meta information sb = new StringBuilder(); sb.append(baseClassifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".meta.txt"), sb.toString()); }
From source file:dkpro.similarity.experiments.sts2013.util.Evaluator.java
License:Open Source License
public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception { for (Dataset dataset : datasets) { // Set parameters int folds = 10; Classifier baseClassifier = new LinearRegression(); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read( MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);//from w w w . j ava 2s .c o m // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter Filter logFilter = new LogFilter(); logFilter.setInputFormat(train); train = Filter.useFilter(train, logFilter); logFilter.setInputFormat(test); test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(classifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) { predictedData = new Instances(pred, 0); } for (int j = 0; j < pred.numInstances(); j++) { predictedData.add(pred.instance(j)); } } // Prepare output scores double[] scores = new double[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; double value = predInst.value(predInst.attribute(valueIdx)); scores[id] = value; // Limit to interval [0;5] if (scores[id] > 5.0) { scores[id] = 5.0; } if (scores[id] < 0.0) { scores[id] = 0.0; } } // Output StringBuilder sb = new StringBuilder(); for (Double score : scores) { sb.append(score.toString() + LF); } FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"), sb.toString()); } }