List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:DiversifyQuery.DivTopK.java
public Instances transformData(Instances data) throws Exception { ArrayList<LegacyShapelet> shapelets = new ArrayList<>(); for (int i = 5; i <= 1; i--) { if (DResultSet.get(i).result.size() == i) { shapelets.addAll(DResultSet.get(i).result); }/*w ww. j a v a 2s. c o m*/ } if (shapelets.size() < 1) { throw new Exception( "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets"); } if (data.classIndex() < 0) { throw new Exception("Require that the class be set for the ShapeletTransform"); } Instances output = determineOutputFormat(data, shapelets); // for each data, get distance to each shapelet and create new instance for (int i = 0; i < data.numInstances(); i++) { // for each data Instance toAdd = new Instance(shapelets.size() + 1); int shapeletNum = 0; for (LegacyShapelet s : shapelets) { double dist = subsequenceDistance(s.content, data.instance(i)); toAdd.setValue(shapeletNum++, dist); } toAdd.setValue(shapelets.size(), data.instance(i).classValue()); output.add(toAdd); } return output; }
From source file:DiversifyQuery.DivTopK.java
public static ArrayList<LegacyShapelet> readShapelets(String fileName, Instances data) { ArrayList<LegacyShapelet> shapeletsList = new ArrayList<>(); LegacyShapelet shapelet = new LegacyShapelet(); int obj, pos, length; double gain, gap, distanceThreshold; try {/*from www .ja v a2 s.co m*/ Scanner sc = new Scanner(new File("shapeletsResult.txt")); while (sc.hasNext()) { shapelet = new LegacyShapelet(sc.nextInt(), sc.nextInt(), sc.nextInt(), sc.nextDouble(), sc.nextDouble(), sc.nextDouble()); double[] contentValue = new double[shapelet.length]; for (int i = 0; i < shapelet.length; i++) { contentValue[i] = data.instance(shapelet.seriesId).value(shapelet.startPos + i); } shapelet.content = contentValue; shapeletsList.add(shapelet); } } catch (Exception e) { System.out.println("?shapelets!"); e.printStackTrace(); } return shapeletsList; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
@Override public Instances process(Instances data) throws Exception { if (this.numShapelets < 1) { throw new Exception( "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets"); }/*from w w w. j ava2s .c o m*/ int maxPossibleLength = data.instance(0).numAttributes() - 1; if (data.classIndex() < 0) { throw new Exception("Require that the class be set for the ShapeletTransform"); } if (this.minShapeletLength < 1 || this.maxShapeletLength < 1 || this.maxShapeletLength < this.minShapeletLength || this.maxShapeletLength > maxPossibleLength) { throw new Exception("Shapelet length parameters initialised incorrectly"); } //Sort data in round robin order dataSourceIDs = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { dataSourceIDs[i] = i; } // data = roundRobinData(data, dataSourceIDs); if (this.shapeletsTrained == false) { // shapelets discovery has not yet been caried out, so do so this.shapelets = findDiversityTopKShapelets(this.numShapelets, data, this.minShapeletLength, this.maxShapeletLength); // get k shapelets ATTENTION this.shapeletsTrained = true; if (!supressOutput) { System.out.println(shapelets.size() + " Shapelets have been generated"); } } Instances output = determineOutputFormat(data); // for each data, get distance to each shapelet and create new instance for (int i = 0; i < data.numInstances(); i++) { // for each data Instance toAdd = new Instance(this.shapelets.size() + 1); int shapeletNum = 0; for (LegacyShapelet s : this.shapelets) { double dist = subseqDistance(s.content, data.instance(i)); toAdd.setValue(shapeletNum++, dist); } toAdd.setValue(this.shapelets.size(), data.instance(i).classValue()); output.add(toAdd); } return output; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
public ArrayList<LegacyShapelet> findBestTopKSAX(int subsequenceLength, int top_k, Instances data, int numClass) { int numObject = data.numInstances(); ArrayList<Point> Dist = new ArrayList<>(numObject); int word;//w w w . j a v a 2s .co m int kk; double gain, distanceThreshold, gap; int qObject, qPosition; USAXElmentType usax; TreeMap<Double, Integer> classDistributions = getClassDistributions(data); // used to calc info gain double[] candidate = new double[subsequenceLength]; ArrayList<LegacyShapelet> shapelets = new ArrayList<LegacyShapelet>(); if (top_k > 0) { Collections.sort(scoreList, new Comparator<Map.Entry<Integer, Double>>() { @Override public int compare(Map.Entry<Integer, Double> a, Map.Entry<Integer, Double> b) { return ((Double) b.getValue()).compareTo((Double) a.getValue()); } }); } for (int k = 0; k < Math.min(top_k, (int) scoreList.size()); k++) { word = scoreList.get(k).getKey(); usax = USAXMap.get(word); for (kk = 0; kk < Math.min((int) usax.SAXIdArrayList.size(), 1); kk++) { qObject = usax.SAXIdArrayList.get(kk).x; qPosition = usax.SAXIdArrayList.get(kk).y; for (int i = 0; i < subsequenceLength; i++) { candidate[i] = data.instance(qObject).value(qPosition + i); } candidate = zNorm(candidate, false); LegacyShapelet candidateShapelet = checkCandidate(candidate, data, qObject, qPosition, classDistributions, null); shapelets.add(candidateShapelet); } } return shapelets; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
protected void createSAXList(int subsequenceLength, int saxLength, int w, Instances data) { w = (int) Math.ceil((double) subsequenceLength / saxLength); saxLength = (int) Math.ceil((double) subsequenceLength / w); double ex, ex2, mean, std; double[] sumSegment = new double[saxLength]; //sumsegment?? int[] elementSegment = new int[saxLength]; int j, jSt, k, slot, objectId; double dataPoint; int word, previousWord; for (k = 0; k < saxLength; k++) { elementSegment[k] = w;//from w w w .j a va 2s .co m } elementSegment[saxLength - 1] = subsequenceLength - (saxLength - 1) * w; // w for (objectId = 0; objectId < data.numInstances(); objectId++) { ex = ex2 = 0; previousWord = -1; for (k = 0; k < saxLength; k++) { sumSegment[k] = 0; } double[] timeSeriesObject = data.instance(objectId).toDoubleArray(); //case 1: Initial for (j = 0; (j < timeSeriesObject.length - 1) && (j < subsequenceLength); j++) { dataPoint = timeSeriesObject[j]; ex += dataPoint; ex2 += dataPoint * dataPoint; slot = (int) Math.floor(j / w); //slotw? sumSegment[slot] += dataPoint; // } //case 2: slightly update for (j = j; j <= timeSeriesObject.length - 1; j++) { jSt = j - subsequenceLength; mean = ex / subsequenceLength; std = Math.sqrt(ex2 / subsequenceLength - mean * mean); //create SAX from sumSegment word = createSAXWord(sumSegment, elementSegment, mean, std, saxLength); if (word != previousWord) { previousWord = word; if (!(USAXMap.containsKey(word))) { USAXMap.put(word, null); USAXElmentType usax = new USAXElmentType(); usax.objectHashSet.add(objectId); usax.SAXIdArrayList.add(new Point(objectId, jSt)); USAXMap.put(word, usax); } else { USAXMap.get(word).objectHashSet.add(objectId); USAXMap.get(word).SAXIdArrayList.add(new Point(objectId, jSt)); //////// } } /// for next updata if (j < timeSeriesObject.length - 1) { ex -= timeSeriesObject[jSt]; ex2 -= timeSeriesObject[jSt] * timeSeriesObject[jSt]; for (k = 0; k < saxLength - 1; k++) { sumSegment[k] -= timeSeriesObject[jSt + k * w]; sumSegment[k] += timeSeriesObject[jSt + (k + 1) * w]; } sumSegment[k] -= timeSeriesObject[jSt + k * w]; sumSegment[k] += timeSeriesObject[jSt + Math.min((k + 1) * w, subsequenceLength)]; dataPoint = timeSeriesObject[j]; ex += dataPoint; ex2 += dataPoint * dataPoint; } } } }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
public double calcScore(USAXElmentType usax, int R, int numClass, Instances data) { // double score = -1; int cid, count; Iterator objectIt = usax.getObjectCountHashMap().entrySet().iterator(); ArrayList<Double> cIn = new ArrayList<>(); ArrayList<Double> cOut = new ArrayList<>(); for (int i = 0; i < numClass; i++) { cIn.add(0.0);// w w w .j a v a 2 s. c o m cOut.add(0.0); } while (objectIt.hasNext()) { Map.Entry entry = (Map.Entry) objectIt.next(); cid = (int) data.instance((int) entry.getKey()).classValue(); count = (int) entry.getValue(); cIn.set(cid, cIn.get(cid) + count); cOut.set(cid, cOut.get(cid) + (R - count)); } score = calScoreFromObjectCount(cIn, cOut, numClass); return score; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
/** * protected method to check a candidate shapelet. Functions by passing in * the raw data, and returning an assessed ShapeletTransform object. * * @param candidate the data from the candidate ShapeletTransform * @param data the entire data set to compare the candidate to * @param seriesId series id from the dataset that the candidate came from * @param startPos start position in the series where the candidate came * from/* w w w . jav a 2 s. com*/ * @param classDistribution a TreeMap<Double, Integer> in the form of * <Class Value, Frequency> to describe the dataset composition * @param qualityBound * @return a fully-computed ShapeletTransform, including the quality of this * candidate */ protected LegacyShapelet checkCandidate(double[] candidate, Instances data, int seriesId, int startPos, TreeMap classDistribution, QualityBound.ShapeletQualityBound qualityBound) { // create orderline by looping through data set and calculating the subsequence // distance from candidate to all data, inserting in order. ArrayList<OrderLineObj> orderline = new ArrayList<OrderLineObj>(); boolean pruned = false; for (int i = 0; i < data.numInstances(); i++) { //Check if it is possible to prune the candidate if (qualityBound != null) { if (qualityBound.pruneCandidate()) { pruned = true; break; } } double distance = 0.0; if (i != seriesId) { distance = subseqDistance(candidate, data.instance(i)); } double classVal = data.instance(i).classValue(); // without early abandon, it is faster to just add and sort at the end orderline.add(new OrderLineObj(distance, classVal)); //Update qualityBound - presumably each bounding method for different quality measures will have a different update procedure. if (qualityBound != null) { qualityBound.updateOrderLine(orderline.get(orderline.size() - 1)); } } // note: early abandon entropy pruning would appear here, but has been ommitted // in favour of a clear multi-class information gain calculation. Could be added in // this method in the future for speed up, but distance early abandon is more important //If shapelet is pruned then it should no longer be considered in further processing if (pruned) { return null; } else { // create a shapelet object to store all necessary info, i.e. LegacyShapelet shapelet = new LegacyShapelet(candidate, seriesId, startPos, this.qualityMeasure); shapelet.calculateQuality(orderline, classDistribution); shapelet.calcInfoGainAndThreshold(orderline, classDistribution); return shapelet; } }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
/** * Private method to calculate the class distributions of a dataset. Main * purpose is for computing shapelet qualities. * * @param data the input data set that the class distributions are to be * derived from//w ww .j a v a2 s . com * @return a TreeMap<Double, Integer> in the form of * <Class Value, Frequency> */ public static TreeMap<Double, Integer> getClassDistributions(Instances data) { TreeMap<Double, Integer> classDistribution = new TreeMap<Double, Integer>(); double classValue; for (int i = 0; i < data.numInstances(); i++) { classValue = data.instance(i).classValue(); boolean classExists = false; for (Double d : classDistribution.keySet()) { if (d == classValue) { int temp = classDistribution.get(d); temp++; classDistribution.put(classValue, temp); classExists = true; } } if (classExists == false) { classDistribution.put(classValue, 1); } } return classDistribution; }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);/*from w w w .ja va2s .com*/ // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Prepare output scores String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".csv"), sb.toString()); // Output prediction arff DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".predicted.arff", predictedData); // Output meta information sb = new StringBuilder(); sb.append(baseClassifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".meta.txt"), sb.toString()); }
From source file:dkpro.similarity.experiments.sts2013.util.Evaluator.java
License:Open Source License
public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception { for (Dataset dataset : datasets) { // Set parameters int folds = 10; Classifier baseClassifier = new LinearRegression(); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read( MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);// w w w . ja v a2 s. c o m // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter Filter logFilter = new LogFilter(); logFilter.setInputFormat(train); train = Filter.useFilter(train, logFilter); logFilter.setInputFormat(test); test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(classifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) { predictedData = new Instances(pred, 0); } for (int j = 0; j < pred.numInstances(); j++) { predictedData.add(pred.instance(j)); } } // Prepare output scores double[] scores = new double[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; double value = predInst.value(predInst.attribute(valueIdx)); scores[id] = value; // Limit to interval [0;5] if (scores[id] > 5.0) { scores[id] = 5.0; } if (scores[id] < 0.0) { scores[id] = 0.0; } } // Output StringBuilder sb = new StringBuilder(); for (Double score : scores) { sb.append(score.toString() + LF); } FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"), sb.toString()); } }