List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:com.actelion.research.orbit.imageAnalysis.tasks.ObjectTrainWorker.java
License:Open Source License
@Override protected void doWork() { if (dontRun) { dontRun = false;/*from w ww. jav a 2 s . c o m*/ return; } trainSet = null; if (modelToBuild != null && modelToBuild.getClassifier() != null) modelToBuild.getClassifier().setBuild(false); List<double[]> trainData = new ArrayList<double[]>(); int mipLayer = -1; // used for checking if all iFrames (with trainData) have the same mapLayer (otherwise the model cannot be trained) for (ImageFrame iFrame : iFrames) { int sampleSize = Math.min(3, iFrame.recognitionFrame.bimg.getImage().getSampleModel().getNumBands()); // was always 1 before! (max 3 because alpha should be ignored) for (int i = 0; i < iFrame.recognitionFrame.getClassShapes().size(); i++) { checkPaused(); List<Shape> shapes = iFrame.recognitionFrame.getClassShapes().get(i).getShapeList(); if (shapes != null && shapes.size() > 0) { if (mipLayer < 0) { mipLayer = iFrame.getMipLayer(); logger.trace("iFrame candidate mipLayer {} from iFrame with width {}", mipLayer, iFrame.recognitionFrame.bimg.getWidth()); } else { if (mipLayer != iFrame.getMipLayer()) { logger.error( "Cell classifier cannot be trained on different image layers. Please use only training data of the same image layer."); return; } } if (mipLayer != modelToBuild.getMipLayer()) { // only same layer as segmentation allowed. Otherwise the cell features must be scaled, too (which is not yet the case). logger.error("Cell classifier must be trained on same layer as segmentation"); return; } } trainData.addAll(new ObjectFeatureBuilderTiled(null).buildFeatures(shapes, i + 1, iFrame.recognitionFrame, iFrame.recognitionFrame.getClassImage(), sampleSize, 0, 0)); // classes 1.0, 2.0, ... } } logger.trace("train levelNum: {}", mipLayer); if (trainData.size() == 0) { logger.error("trainset is empty, classifier cannot be trained."); trainSet = null; return; } if (isCancelled()) { cleanUp(); return; } timeEst = 1000 * 10L; setProgress(10); logger.debug("trainData contains " + trainData.size() + " samples"); Attribute classAttr = null; // create the first time a new trainSet. All further trainings will append new instances. if (trainSet == null) { // build traindata header double[] firstRowAll = trainData.get(0); double[] firstRow = Arrays.copyOfRange(firstRowAll, 0, firstRowAll.length - ObjectFeatureBuilderTiled.SkipTailForClassification); ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(firstRow.length); for (int a = 0; a < firstRow.length - 1; a++) { Attribute attr = new Attribute("a" + a); // if (a<firstRow.length-2) attr.setWeight(0.1d); else attr.setWeight(1.0d); attrInfo.add(attr); } List<String> classValues = new ArrayList<String>( iFrames.get(0).recognitionFrame.getClassShapes().size()); for (int i = 0; i < iFrames.get(0).recognitionFrame.getClassShapes().size(); i++) { classValues.add((i + 1) + ".0"); // "1.0", "2.0", ... } classAttr = new Attribute("class", classValues); attrInfo.add(classAttr); trainSet = new Instances("trainSet pattern classes", attrInfo, trainData.size()); trainSet.setClassIndex(firstRow.length - 1); } else classAttr = trainSet.attribute("class"); // add instances for (double[] valsAll : trainData) { // skip some non relevant attributes like centerX/Y double[] vals = Arrays.copyOfRange(valsAll, 0, valsAll.length - ObjectFeatureBuilderTiled.SkipTailForClassification); vals[vals.length - 1] = valsAll[valsAll.length - 1]; // class value double classV = classAttr.indexOfValue(Double.toString(vals[vals.length - 1])); vals[vals.length - 1] = classV; Instance inst = new DenseInstance(1.0d, vals); trainSet.add(inst); } // trainSet = trainSet.resample(rand); logger.debug("trainSet contains " + trainSet.numInstances() + " instances"); if (logger.isTraceEnabled()) logger.trace(trainSet.toString()); // building classifier if (isCancelled()) { cleanUp(); return; } checkPaused(); timeEst = 1000 * 5L; setProgress(20); logger.info("Start training classifier... "); classifier = new ClassifierWrapper(new weka.classifiers.functions.SMO()); try { classifier.buildClassifier(trainSet); classifier.setBuild(true); modelToBuild.setClassifier(classifier); modelToBuild.setStructure(trainSet.stringFreeStructure()); modelToBuild.setCellClassification(true); modelToBuild.setMipLayer(mipLayer); setProgress(85); // evaluation StringBuilder cnamesInfo = new StringBuilder( "Evaluation for object classification model with classes: "); for (int i = 0; i < modelToBuild.getClassShapes().size(); i++) { cnamesInfo.append(modelToBuild.getClassShapes().get(i).getName()); if (i < modelToBuild.getClassShapes().size() - 1) cnamesInfo.append(", "); } logger.info(cnamesInfo.toString()); Evaluation evaluation = new Evaluation(trainSet); evaluation.evaluateModel(classifier.getClassifier(), trainSet); logger.info(evaluation.toSummaryString()); if (evaluation.pctCorrect() < OrbitUtils.ACCURACY_WARNING) { String w = "Warning: The model classifies the training objects only with an accuracy of " + evaluation.pctCorrect() + "%.\nThat means that the marked objects are not diverse enough.\nYou might want to remove some marked objects and mark some more representative ones.\nHowever, you can still use this model if you want (check the object classification)."; logger.warn(w); if (withGUI && !ScaleoutMode.SCALEOUTMODE.get()) { JOptionPane.showMessageDialog(null, w, "Warning: Low accuracy", JOptionPane.WARNING_MESSAGE); } } } catch (Exception e) { classifier = null; logger.error("error training classifier: ", e); } logger.info("training done."); timeEst = 0L; setProgress(100); }
From source file:com.actelion.research.orbit.imageAnalysis.tasks.TrainWorker.java
License:Open Source License
private void trainClassifier() throws OrbitImageServletException { logger.debug("start trainClassifier"); if (modelToBuild != null && modelToBuild.getClassifier() != null) modelToBuild.getClassifier().setBuild(false); trainSet = null;//from ww w . j av a 2s . c om List<double[]> trainData = new ArrayList<double[]>(); int mipLayer = -1; // used for checking if all iFrames (with trainData) have the same mapLayer (otherwise the model cannot be trained) for (ImageFrame iFrame : iFrames) { if (logger.isTraceEnabled()) logger.trace( iFrame.getTitle() + ": #ClassShapes: " + iFrame.recognitionFrame.getClassShapes().size()); for (int i = 0; i < iFrame.recognitionFrame.getClassShapes().size(); i++) { // checkPaused(); if (iFrame.recognitionFrame.getClassShapes().get(i).getShapeList().size() > 0) { // set and check mip level only for iFrames with shapes (training data) if (mipLayer < 0) { mipLayer = iFrame.getMipLayer(); logger.trace("iFrame candidate mipLayer {} from iFrame with width {}", mipLayer, iFrame.recognitionFrame.bimg.getWidth()); } else { if (mipLayer != iFrame.getMipLayer()) { logger.error( "Model cannot be trained on different image layers. Please use only training data of the same image layer."); return; } } } List<Shape> shapes = iFrame.recognitionFrame.getClassShapes().get(i).getShapeList(); trainData.addAll(getFeatures(shapes, i + 1, iFrame.recognitionFrame.bimg)); // classes 1.0, 2.0, ... } } logger.trace("train levelNum: {}", mipLayer); if (trainData.size() == 0) { logger.error("trainset is empty, classifier cannot be trained."); trainSet = null; return; } if (isCancelled()) { logger.debug("canceled"); cleanUp(); return; } timeEst = 1000 * 10L; setProgress(10); logger.debug("trainData contains " + trainData.size() + " samples"); // limit training instances if (trainData.size() > MAXINST) { Collections.shuffle(trainData, rand); trainData = trainData.subList(0, MAXINST); logger.debug("trainSet shirked to " + trainData.size() + " instances"); } Attribute classAttr = null; // create the first time a new trainSet. All further trainings will append new instances. if (trainSet == null) { // build traindata header double[] firstRow = trainData.get(0); ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(firstRow.length); for (int a = 0; a < firstRow.length - 1; a++) { Attribute attr = new Attribute("a" + a); // if (a<firstRow.length-2) attr.setWeight(0.1d); else attr.setWeight(1.0d); attrInfo.add(attr); } List<String> classValues = new ArrayList<String>( iFrames.get(0).recognitionFrame.getClassShapes().size()); for (int i = 0; i < iFrames.get(0).recognitionFrame.getClassShapes().size(); i++) { classValues.add((i + 1) + ".0"); // "1.0", "2.0", ... } classAttr = new Attribute("class", classValues); attrInfo.add(classAttr); trainSet = new Instances("trainSet pattern classes", attrInfo, trainData.size()); trainSet.setClassIndex(firstRow.length - 1); } else classAttr = trainSet.attribute("class"); // add instances for (double[] vals : trainData) { double classV = classAttr.indexOfValue(Double.toString(vals[vals.length - 1])); vals[vals.length - 1] = classV; //Instance inst = new Instance(1.0d, vals); Instance inst = new DenseInstance(1.0d, vals); trainSet.add(inst); } trainSet = trainSet.resample(rand); logger.debug("trainSet contains " + trainSet.numInstances() + " instances"); // building classifier if (isCancelled()) { cleanUp(); return; } checkPaused(); timeEst = 1000 * 5L; setProgress(20); logger.info("Start training classifier... "); Classifier c; /* // experiments with deep learning... do not use in production. if (AparUtils.DEEPORBIT) { FeatureDescription fd = modelToBuild!=null? modelToBuild.getFeatureDescription(): new FeatureDescription(); TissueFeatures tissueFeaturre = AparUtils.createTissueFeatures(fd, null); int numOutNeurons = modelToBuild.getClassShapes().size(); int numInNeurons = tissueFeaturre.prepareDoubleArray().length-1; logger.debug("numNeuronsIn:"+numInNeurons+" numNeuronsOut:"+numOutNeurons); MultiLayerPerceptron neuralNet = new MultiLayerPerceptron(numInNeurons,100, numOutNeurons); for (int a=0; a<numOutNeurons; a++) { neuralNet.getOutputNeurons()[a].setLabel("class"+a); } neuralNet.connectInputsToOutputs(); MomentumBackpropagation mb = new MomentumBackpropagation(); mb.setLearningRate(0.2d); mb.setMomentum(0.7d); //mb.setMaxIterations(20); mb.setMaxError(0.12); neuralNet.setLearningRule(mb); c = new WekaNeurophClassifier(neuralNet); } else { c = new weka.classifiers.functions.SMO(); } */ c = new weka.classifiers.functions.SMO(); //weka.classifiers.functions.LibSVM c = new weka.classifiers.functions.LibSVM(); //Classifier c = new weka.classifiers.trees.J48(); classifier = new ClassifierWrapper(c); //classifier = new weka.classifiers.bayes.BayesNet(); //classifier = new weka.classifiers.functions.MultilayerPerceptron(); //((weka.classifiers.functions.SMO)classifier).setKernel(new weka.classifiers.functions.supportVector.RBFKernel()); try { classifier.buildClassifier(trainSet); classifier.setBuild(true); modelToBuild.setClassifier(classifier); modelToBuild.setStructure(trainSet.stringFreeStructure()); modelToBuild.setCellClassification(false); modelToBuild.setMipLayer(mipLayer); logger.debug("training done"); // evaluation StringBuilder cnamesInfo = new StringBuilder("Evaluation for model with classes: "); for (int i = 0; i < modelToBuild.getClassShapes().size(); i++) { cnamesInfo.append(modelToBuild.getClassShapes().get(i).getName()); if (i < modelToBuild.getClassShapes().size() - 1) cnamesInfo.append(", "); } logger.info(cnamesInfo.toString()); Evaluation evaluation = new Evaluation(trainSet); evaluation.evaluateModel(classifier.getClassifier(), trainSet); logger.info(evaluation.toSummaryString()); if (evaluation.pctCorrect() < OrbitUtils.ACCURACY_WARNING) { final String w = "Warning: The model classifies the training shapes only with an accuracy of " + evaluation.pctCorrect() + "%.\nThat means that the drawn class shapes are not diverse enough.\nYou might want to remove some class shapes and mark some more representative regions.\nHowever, you can still use this model if you want (check the classification)."; logger.warn(w); if (withGUI && !ScaleoutMode.SCALEOUTMODE.get()) { SwingUtilities.invokeLater(new Runnable() { @Override public void run() { JOptionPane.showMessageDialog(null, w, "Warning: Low accuracy", JOptionPane.WARNING_MESSAGE); } }); } } } catch (Exception e) { classifier = null; logger.error("error training classifier", e); } // logger.trace(classifier.toString()); }
From source file:com.actelion.research.orbit.imageAnalysis.tasks.TrainWorker.java
License:Open Source License
private void createClusterer() { int MAX_TILES_CLUSTERING = 50; if (iFrames == null || iFrames.size() < 1) { logger.error("cannot build clusterer, no open image frames."); return;/* www .ja va 2 s . c o m*/ } if (modelToBuild != null && modelToBuild.getClassifier() != null) modelToBuild.getClassifier().setBuild(false); int windowSize = modelToBuild.getFeatureDescription().getWindowSize(); List<double[]> trainData = new ArrayList<double[]>(); for (ImageFrame iFrame : iFrames) { PlanarImage image = iFrame.recognitionFrame.bimg.getImage(); TissueFeatures tissueFeatures = new TissueFeatures(modelToBuild.getFeatureDescription(), iFrame.recognitionFrame.bimg); Point[] tileArr = image.getTileIndices(null); if (tileArr.length > MAX_TILES_CLUSTERING) { logger.trace("number of tiles for clustering: " + tileArr.length); List<Point> pList = new ArrayList<Point>(tileArr.length); for (Point p : tileArr) pList.add(p); Collections.shuffle(pList); pList = pList.subList(0, MAX_TILES_CLUSTERING); tileArr = pList.toArray(new Point[0]); logger.trace("number of tiles after tile limit: " + tileArr.length); } for (Point tileNum : tileArr) { Raster r = image.getTile(tileNum.x, tileNum.y); for (int x = image.tileXToX(tileNum.x); x < Math .min(image.tileXToX(tileNum.x) + image.getTileWidth(), image.getWidth()); x++) { for (int y = image.tileYToY(tileNum.y); y < Math .min(image.tileYToY(tileNum.y) + image.getTileHeight(), image.getHeight()); y++) { if ((x < r.getMinX() + windowSize) || (y < r.getMinY() + windowSize) || (x > r.getMinX() + r.getWidth() - windowSize - 1) || (y > r.getMinY() + r.getHeight() - windowSize - 1)) continue; double[] feats = null; try { feats = tissueFeatures.buildFeatures(r, x, y, Double.NaN); } catch (Throwable t) { System.out.println(t.getMessage()); t.printStackTrace(); } trainData.add(feats); } // y checkPaused(); if (isCancelled()) { cleanUp(); return; } } // x } // tileNum } // iFrames timeEst = 1000 * 60L; setProgress(20); // trainData -> instances checkPaused(); if (isCancelled()) { cleanUp(); return; } trainSet = null; Attribute classAttr = null; // create the first time a new trainSet. All further trainings will append new instances. if (trainSet == null) { // build traindata header double[] firstRow = trainData.get(0); ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(firstRow.length); for (int a = 0; a < firstRow.length - 1; a++) { Attribute attr = new Attribute("a" + a); // if (a<firstRow.length-2) attr.setWeight(0.1d); else attr.setWeight(1.0d); attrInfo.add(attr); } List<String> classValues = new ArrayList<String>( iFrames.get(0).recognitionFrame.getClassShapes().size()); for (int i = 0; i < iFrames.get(0).recognitionFrame.getClassShapes().size(); i++) { classValues.add((i + 1) + ".0"); // "1.0", "2.0", ... } classAttr = new Attribute("class", classValues); attrInfo.add(classAttr); trainSet = new Instances("trainSet pattern classes", attrInfo, trainData.size()); trainSet.setClassIndex(firstRow.length - 1); } else classAttr = trainSet.attribute("class"); timeEst = 1000 * 45L; setProgress(25); // add instances checkPaused(); if (isCancelled()) { cleanUp(); return; } for (double[] vals : trainData) { double classV = Double.NaN; vals[vals.length - 1] = classV; Instance inst = new DenseInstance(1.0d, vals); trainSet.add(inst); } trainSet = trainSet.resample(rand); trainSet.setClassIndex(-1); Instances ts = new Instances(trainSet, 0); ts.addAll(trainSet.subList(0, Math.min(MAX_CLUSTERING_EXAMPLES, trainSet.size() - 1))); trainSet = null; trainSet = ts; logger.debug("trainSet contains " + trainSet.numInstances() + " instances, class Attribute: " + trainSet.classIndex()); logger.info("start building clusterer..."); timeEst = 1000 * 40L; setProgress(30); // build clusterer checkPaused(); if (isCancelled()) { cleanUp(); return; } // Clusterer clusterer = new weka.clusterers.SimpleKMeans(); //Clusterer clusterer = new MakeDensityBasedClusterer(new SimpleKMeans()); Clusterer clusterer = new EM(); try { //((weka.clusterers.SimpleKMeans)clusterer).setNumClusters(iFrames.get(0).recognitionFrame.getClassShapes().size()); // ((MakeDensityBasedClusterer)clusterer).setNumClusters(iFrames.get(0).recognitionFrame.getClassShapes().size()); ((EM) clusterer).setNumClusters(iFrames.get(0).recognitionFrame.getClassShapes().size()); clusterer.buildClusterer(trainSet); } catch (Exception e) { logger.error( "cannot build clusterer or cannot set number of clusters (classShapes not correctly initialized?)"); e.printStackTrace(); } logger.info( "done. (clusterer is densityBasedClusterer: " + (clusterer instanceof DensityBasedClusterer) + ")"); // sort class labels according to priors classifier = new ClassifierWrapper(clusterer); classifier.setBuild(true); this.trainSet = trainSet.stringFreeStructure(); modelToBuild.setClassifier(classifier); modelToBuild.setStructure(trainSet.stringFreeStructure()); }
From source file:com.davidmascharka.lips.TrackerActivity.java
License:Open Source License
/** * Adds the attributes to the x classification, sets up the xInstance to * allow the classifier to predict x position from these attributes *///from ww w. j a v a 2 s.c o m private void setUpXInstances() { xClass.add(attrAccelX); xClass.add(attrAccelY); xClass.add(attrAccelZ); xClass.add(attrMagneticX); xClass.add(attrMagneticY); xClass.add(attrMagneticZ); xClass.add(attrLight); xClass.add(attrRotationX); xClass.add(attrRotationY); xClass.add(attrRotationZ); xClass.add(attrOrientationX); xClass.add(attrOrientationY); xClass.add(attrOrientationZ); xClass.add(attrBSSID1); xClass.add(attrBSSID2); xClass.add(attrBSSID3); xClass.add(attrBSSID4); xClass.add(attrBSSID5); xClass.add(attrBSSID6); xClass.add(attrBSSID7); xClass.add(attrBSSID8); xClass.add(attrBSSID9); xClass.add(attrBSSID10); xClass.add(attrBSSID11); xClass.add(attrBSSID12); xClass.add(attrBSSID13); xClass.add(attrBSSID14); xClass.add(attrBSSID15); xClass.add(attrBSSID16); xClass.add(attrBSSID17); xClass.add(attrBSSID18); xClass.add(attrBSSID19); xClass.add(attrBSSID20); xClass.add(attrBSSID21); xClass.add(attrBSSID22); xClass.add(attrBSSID23); xClass.add(attrBSSID24); xClass.add(attrBSSID25); xClass.add(attrBSSID26); xClass.add(attrBSSID27); xClass.add(attrBSSID28); xClass.add(attrBSSID29); xClass.add(attrBSSID30); xClass.add(attrBSSID31); xClass.add(attrBSSID32); xClass.add(attrBSSID33); xClass.add(attrBSSID34); xClass.add(attrBSSID35); xClass.add(attrBSSID36); xClass.add(attrBSSID37); xClass.add(attrBSSID38); xClass.add(attrBSSID39); xClass.add(attrBSSID40); xClass.add(attrBSSID41); xClass.add(attrBSSID42); xClass.add(attrBSSID43); xClass.add(attrBSSID44); xClass.add(attrBSSID45); xClass.add(attrBSSID46); xClass.add(attrBSSID47); xClass.add(attrBSSID48); xClass.add(attrBSSID49); xClass.add(attrBSSID50); xClass.add(attrBSSID51); xClass.add(attrBSSID52); xClass.add(attrBSSID53); xClass.add(attrBSSID54); xClass.add(attrBSSID55); xClass.add(attrBSSID56); xClass.add(attrBSSID57); xClass.add(attrBSSID58); xClass.add(attrBSSID59); xClass.add(attrBSSID60); xClass.add(attrBSSID61); xClass.add(attrBSSID62); xClass.add(attrBSSID63); xClass.add(attrBSSID64); xClass.add(attrBSSID65); xClass.add(attrBSSID66); xClass.add(attrBSSID67); xClass.add(attrBSSID68); xClass.add(attrBSSID69); xClass.add(attrBSSID70); xClass.add(attrBSSID71); xClass.add(attrBSSID72); xClass.add(attrBSSID73); xClass.add(attrBSSID74); xClass.add(attrBSSID75); xClass.add(attrBSSID76); xClass.add(attrBSSID77); xClass.add(attrBSSID78); xClass.add(attrBSSID79); xClass.add(attrBSSID80); xClass.add(attrBSSID81); xClass.add(attrBSSID82); xClass.add(attrBSSID83); xClass.add(attrBSSID84); xClass.add(attrBSSID85); xClass.add(attrBSSID86); xClass.add(attrBSSID87); xClass.add(attrBSSID88); xClass.add(attrBSSID89); xClass.add(attrBSSID90); xClass.add(attrBSSID91); xClass.add(attrBSSID92); xClass.add(attrBSSID93); xClass.add(attrBSSID94); xClass.add(attrBSSID95); xClass.add(attrBSSID96); xClass.add(attrBSSID97); xClass.add(attrBSSID98); xClass.add(attrBSSID99); xClass.add(attrBSSID100); xClass.add(attrBSSID101); xClass.add(attrBSSID102); xClass.add(attrBSSID103); xClass.add(attrBSSID104); xClass.add(attrBSSID105); xClass.add(attrBSSID106); xClass.add(attrBSSID107); xClass.add(attrBSSID108); xClass.add(attrBSSID109); xClass.add(attrBSSID110); xClass.add(attrBSSID111); xClass.add(attrBSSID112); xClass.add(attrBSSID113); xClass.add(attrBSSID114); xClass.add(attrBSSID115); xClass.add(attrBSSID116); xClass.add(attrBSSID117); xClass.add(attrBSSID118); xClass.add(attrBSSID119); xClass.add(attrBSSID120); xClass.add(attrBSSID121); xClass.add(attrBSSID122); xClass.add(attrBSSID123); xClass.add(attrBSSID124); xClass.add(attrBSSID125); xClass.add(attrBSSID126); xClass.add(attrBSSID127); xClass.add(attrBSSID128); xClass.add(attrBSSID129); xClass.add(attrBSSID130); xClass.add(attrBSSID131); xClass.add(attrBSSID132); xClass.add(attrBSSID133); xClass.add(attrBSSID134); xClass.add(attrBSSID135); xClass.add(attrBSSID136); xClass.add(attrBSSID137); xClass.add(attrBSSID138); xClass.add(attrBSSID139); xClass.add(attrBSSID140); xClass.add(attrBSSID141); xClass.add(attrBSSID142); xClass.add(attrBSSID143); xClass.add(attrBSSID144); xClass.add(attrBSSID145); xClass.add(attrBSSID146); xClass.add(attrBSSID147); xClass.add(attrBSSID148); xClass.add(attrBSSID149); xClass.add(attrBSSID150); xClass.add(attrBSSID151); xClass.add(attrBSSID152); xClass.add(attrBSSID153); xClass.add(attrBSSID154); xClass.add(attrBSSID155); xClass.add(attrBSSID156); xClass.add(attrLatitude); xClass.add(attrLongitude); xClass.add(attrLocationAccuracy); xClass.add(attrXPosition); xInstances = new Instances("xPos", xClass, 1); xInstances.setClassIndex(172); xInstances.add(new DenseInstance(173)); }
From source file:com.davidmascharka.lips.TrackerActivity.java
License:Open Source License
/** * Adds the attributes to the y classification, sets up the yInstance to * allow the classifier to predict y position from these attributes *//*from www . j a v a 2 s . c o m*/ private void setUpYInstances() { yClass.add(attrAccelX); yClass.add(attrAccelY); yClass.add(attrAccelZ); yClass.add(attrMagneticX); yClass.add(attrMagneticY); yClass.add(attrMagneticZ); yClass.add(attrLight); yClass.add(attrRotationX); yClass.add(attrRotationY); yClass.add(attrRotationZ); yClass.add(attrOrientationX); yClass.add(attrOrientationY); yClass.add(attrOrientationZ); yClass.add(attrBSSID1); yClass.add(attrBSSID2); yClass.add(attrBSSID3); yClass.add(attrBSSID4); yClass.add(attrBSSID5); yClass.add(attrBSSID6); yClass.add(attrBSSID7); yClass.add(attrBSSID8); yClass.add(attrBSSID9); yClass.add(attrBSSID10); yClass.add(attrBSSID11); yClass.add(attrBSSID12); yClass.add(attrBSSID13); yClass.add(attrBSSID14); yClass.add(attrBSSID15); yClass.add(attrBSSID16); yClass.add(attrBSSID17); yClass.add(attrBSSID18); yClass.add(attrBSSID19); yClass.add(attrBSSID20); yClass.add(attrBSSID21); yClass.add(attrBSSID22); yClass.add(attrBSSID23); yClass.add(attrBSSID24); yClass.add(attrBSSID25); yClass.add(attrBSSID26); yClass.add(attrBSSID27); yClass.add(attrBSSID28); yClass.add(attrBSSID29); yClass.add(attrBSSID30); yClass.add(attrBSSID31); yClass.add(attrBSSID32); yClass.add(attrBSSID33); yClass.add(attrBSSID34); yClass.add(attrBSSID35); yClass.add(attrBSSID36); yClass.add(attrBSSID37); yClass.add(attrBSSID38); yClass.add(attrBSSID39); yClass.add(attrBSSID40); yClass.add(attrBSSID41); yClass.add(attrBSSID42); yClass.add(attrBSSID43); yClass.add(attrBSSID44); yClass.add(attrBSSID45); yClass.add(attrBSSID46); yClass.add(attrBSSID47); yClass.add(attrBSSID48); yClass.add(attrBSSID49); yClass.add(attrBSSID50); yClass.add(attrBSSID51); yClass.add(attrBSSID52); yClass.add(attrBSSID53); yClass.add(attrBSSID54); yClass.add(attrBSSID55); yClass.add(attrBSSID56); yClass.add(attrBSSID57); yClass.add(attrBSSID58); yClass.add(attrBSSID59); yClass.add(attrBSSID60); yClass.add(attrBSSID61); yClass.add(attrBSSID62); yClass.add(attrBSSID63); yClass.add(attrBSSID64); yClass.add(attrBSSID65); yClass.add(attrBSSID66); yClass.add(attrBSSID67); yClass.add(attrBSSID68); yClass.add(attrBSSID69); yClass.add(attrBSSID70); yClass.add(attrBSSID71); yClass.add(attrBSSID72); yClass.add(attrBSSID73); yClass.add(attrBSSID74); yClass.add(attrBSSID75); yClass.add(attrBSSID76); yClass.add(attrBSSID77); yClass.add(attrBSSID78); yClass.add(attrBSSID79); yClass.add(attrBSSID80); yClass.add(attrBSSID81); yClass.add(attrBSSID82); yClass.add(attrBSSID83); yClass.add(attrBSSID84); yClass.add(attrBSSID85); yClass.add(attrBSSID86); yClass.add(attrBSSID87); yClass.add(attrBSSID88); yClass.add(attrBSSID89); yClass.add(attrBSSID90); yClass.add(attrBSSID91); yClass.add(attrBSSID92); yClass.add(attrBSSID93); yClass.add(attrBSSID94); yClass.add(attrBSSID95); yClass.add(attrBSSID96); yClass.add(attrBSSID97); yClass.add(attrBSSID98); yClass.add(attrBSSID99); yClass.add(attrBSSID100); yClass.add(attrBSSID101); yClass.add(attrBSSID102); yClass.add(attrBSSID103); yClass.add(attrBSSID104); yClass.add(attrBSSID105); yClass.add(attrBSSID106); yClass.add(attrBSSID107); yClass.add(attrBSSID108); yClass.add(attrBSSID109); yClass.add(attrBSSID110); yClass.add(attrBSSID111); yClass.add(attrBSSID112); yClass.add(attrBSSID113); yClass.add(attrBSSID114); yClass.add(attrBSSID115); yClass.add(attrBSSID116); yClass.add(attrBSSID117); yClass.add(attrBSSID118); yClass.add(attrBSSID119); yClass.add(attrBSSID120); yClass.add(attrBSSID121); yClass.add(attrBSSID122); yClass.add(attrBSSID123); yClass.add(attrBSSID124); yClass.add(attrBSSID125); yClass.add(attrBSSID126); yClass.add(attrBSSID127); yClass.add(attrBSSID128); yClass.add(attrBSSID129); yClass.add(attrBSSID130); yClass.add(attrBSSID131); yClass.add(attrBSSID132); yClass.add(attrBSSID133); yClass.add(attrBSSID134); yClass.add(attrBSSID135); yClass.add(attrBSSID136); yClass.add(attrBSSID137); yClass.add(attrBSSID138); yClass.add(attrBSSID139); yClass.add(attrBSSID140); yClass.add(attrBSSID141); yClass.add(attrBSSID142); yClass.add(attrBSSID143); yClass.add(attrBSSID144); yClass.add(attrBSSID145); yClass.add(attrBSSID146); yClass.add(attrBSSID147); yClass.add(attrBSSID148); yClass.add(attrBSSID149); yClass.add(attrBSSID150); yClass.add(attrBSSID151); yClass.add(attrBSSID152); yClass.add(attrBSSID153); yClass.add(attrBSSID154); yClass.add(attrBSSID155); yClass.add(attrBSSID156); yClass.add(attrLatitude); yClass.add(attrLongitude); yClass.add(attrLocationAccuracy); yClass.add(attrYPosition); yInstances = new Instances("yPos", yClass, 1); yInstances.setClassIndex(172); yInstances.add(new DenseInstance(173)); }
From source file:com.davidmascharka.lips.TrackerActivity.java
License:Open Source License
/** * Adds the attributes to the partition classification, set partition up to * allow the classifier to predict partition from these attributes *//*from w ww . j av a2 s. c o m*/ private void setUpPartitionInstances() { partitionClass.add(attrAccelX); partitionClass.add(attrAccelY); partitionClass.add(attrAccelZ); partitionClass.add(attrMagneticX); partitionClass.add(attrMagneticY); partitionClass.add(attrMagneticZ); partitionClass.add(attrLight); partitionClass.add(attrRotationX); partitionClass.add(attrRotationY); partitionClass.add(attrRotationZ); partitionClass.add(attrOrientationX); partitionClass.add(attrOrientationY); partitionClass.add(attrOrientationZ); partitionClass.add(attrBSSID1); partitionClass.add(attrBSSID2); partitionClass.add(attrBSSID3); partitionClass.add(attrBSSID4); partitionClass.add(attrBSSID5); partitionClass.add(attrBSSID6); partitionClass.add(attrBSSID7); partitionClass.add(attrBSSID8); partitionClass.add(attrBSSID9); partitionClass.add(attrBSSID10); partitionClass.add(attrBSSID11); partitionClass.add(attrBSSID12); partitionClass.add(attrBSSID13); partitionClass.add(attrBSSID14); partitionClass.add(attrBSSID15); partitionClass.add(attrBSSID16); partitionClass.add(attrBSSID17); partitionClass.add(attrBSSID18); partitionClass.add(attrBSSID19); partitionClass.add(attrBSSID20); partitionClass.add(attrBSSID21); partitionClass.add(attrBSSID22); partitionClass.add(attrBSSID23); partitionClass.add(attrBSSID24); partitionClass.add(attrBSSID25); partitionClass.add(attrBSSID26); partitionClass.add(attrBSSID27); partitionClass.add(attrBSSID28); partitionClass.add(attrBSSID29); partitionClass.add(attrBSSID30); partitionClass.add(attrBSSID31); partitionClass.add(attrBSSID32); partitionClass.add(attrBSSID33); partitionClass.add(attrBSSID34); partitionClass.add(attrBSSID35); partitionClass.add(attrBSSID36); partitionClass.add(attrBSSID37); partitionClass.add(attrBSSID38); partitionClass.add(attrBSSID39); partitionClass.add(attrBSSID40); partitionClass.add(attrBSSID41); partitionClass.add(attrBSSID42); partitionClass.add(attrBSSID43); partitionClass.add(attrBSSID44); partitionClass.add(attrBSSID45); partitionClass.add(attrBSSID46); partitionClass.add(attrBSSID47); partitionClass.add(attrBSSID48); partitionClass.add(attrBSSID49); partitionClass.add(attrBSSID50); partitionClass.add(attrBSSID51); partitionClass.add(attrBSSID52); partitionClass.add(attrBSSID53); partitionClass.add(attrBSSID54); partitionClass.add(attrBSSID55); partitionClass.add(attrBSSID56); partitionClass.add(attrBSSID57); partitionClass.add(attrBSSID58); partitionClass.add(attrBSSID59); partitionClass.add(attrBSSID60); partitionClass.add(attrBSSID61); partitionClass.add(attrBSSID62); partitionClass.add(attrBSSID63); partitionClass.add(attrBSSID64); partitionClass.add(attrBSSID65); partitionClass.add(attrBSSID66); partitionClass.add(attrBSSID67); partitionClass.add(attrBSSID68); partitionClass.add(attrBSSID69); partitionClass.add(attrBSSID70); partitionClass.add(attrBSSID71); partitionClass.add(attrBSSID72); partitionClass.add(attrBSSID73); partitionClass.add(attrBSSID74); partitionClass.add(attrBSSID75); partitionClass.add(attrBSSID76); partitionClass.add(attrBSSID77); partitionClass.add(attrBSSID78); partitionClass.add(attrBSSID79); partitionClass.add(attrBSSID80); partitionClass.add(attrBSSID81); partitionClass.add(attrBSSID82); partitionClass.add(attrBSSID83); partitionClass.add(attrBSSID84); partitionClass.add(attrBSSID85); partitionClass.add(attrBSSID86); partitionClass.add(attrBSSID87); partitionClass.add(attrBSSID88); partitionClass.add(attrBSSID89); partitionClass.add(attrBSSID90); partitionClass.add(attrBSSID91); partitionClass.add(attrBSSID92); partitionClass.add(attrBSSID93); partitionClass.add(attrBSSID94); partitionClass.add(attrBSSID95); partitionClass.add(attrBSSID96); partitionClass.add(attrBSSID97); partitionClass.add(attrBSSID98); partitionClass.add(attrBSSID99); partitionClass.add(attrBSSID100); partitionClass.add(attrBSSID101); partitionClass.add(attrBSSID102); partitionClass.add(attrBSSID103); partitionClass.add(attrBSSID104); partitionClass.add(attrBSSID105); partitionClass.add(attrBSSID106); partitionClass.add(attrBSSID107); partitionClass.add(attrBSSID108); partitionClass.add(attrBSSID109); partitionClass.add(attrBSSID110); partitionClass.add(attrBSSID111); partitionClass.add(attrBSSID112); partitionClass.add(attrBSSID113); partitionClass.add(attrBSSID114); partitionClass.add(attrBSSID115); partitionClass.add(attrBSSID116); partitionClass.add(attrBSSID117); partitionClass.add(attrBSSID118); partitionClass.add(attrBSSID119); partitionClass.add(attrBSSID120); partitionClass.add(attrBSSID121); partitionClass.add(attrBSSID122); partitionClass.add(attrBSSID123); partitionClass.add(attrBSSID124); partitionClass.add(attrBSSID125); partitionClass.add(attrBSSID126); partitionClass.add(attrBSSID127); partitionClass.add(attrBSSID128); partitionClass.add(attrBSSID129); partitionClass.add(attrBSSID130); partitionClass.add(attrBSSID131); partitionClass.add(attrBSSID132); partitionClass.add(attrBSSID133); partitionClass.add(attrBSSID134); partitionClass.add(attrBSSID135); partitionClass.add(attrBSSID136); partitionClass.add(attrBSSID137); partitionClass.add(attrBSSID138); partitionClass.add(attrBSSID139); partitionClass.add(attrBSSID140); partitionClass.add(attrBSSID141); partitionClass.add(attrBSSID142); partitionClass.add(attrBSSID143); partitionClass.add(attrBSSID144); partitionClass.add(attrBSSID145); partitionClass.add(attrBSSID146); partitionClass.add(attrBSSID147); partitionClass.add(attrBSSID148); partitionClass.add(attrBSSID149); partitionClass.add(attrBSSID150); partitionClass.add(attrBSSID151); partitionClass.add(attrBSSID152); partitionClass.add(attrBSSID153); partitionClass.add(attrBSSID154); partitionClass.add(attrBSSID155); partitionClass.add(attrBSSID156); partitionClass.add(attrLatitude); partitionClass.add(attrLongitude); partitionClass.add(attrLocationAccuracy); partitionClass.add(attrPartition); partitionInstances = new Instances("partition", partitionClass, 1); partitionInstances.setClassIndex(172); partitionInstances.add(new DenseInstance(173)); }
From source file:com.deafgoat.ml.prognosticator.ARFFWriter.java
License:Apache License
/** * Initializes our instances object once we've added all our dump and * user-defined features//w w w .j a v a 2s.c om */ private void initializeInstances() { _logger.info("Initializing instances object"); _data = new Instances(_config._relation, _attributes, 0); }
From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetClassifier.java
License:Apache License
public TweetClassifier() { String nameOfDataset = "MessageClassificationProblem"; FastVector attributes = new FastVector(2); attributes.addElement(new Attribute("content", (FastVector) null)); FastVector classValues = new FastVector(4); classValues.addElement(""); classValues.addElement("neutral"); classValues.addElement("negative"); classValues.addElement("positive"); attributes.addElement(new Attribute("Class", classValues)); m_Data = new Instances(nameOfDataset, attributes, 100); m_Data.setClassIndex(m_Data.numAttributes() - 1); }
From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetFeatureExtractor.java
License:Apache License
/** * Method which contructs the arff file for weka with the training data *//*from w ww .j a va 2 s . c o m*/ public static void constructModel() { Instances instdata = null; try { FastVector atts; atts = new FastVector(); atts.addElement(new Attribute("content", (FastVector) null)); FastVector fvClassVal = new FastVector(4); fvClassVal.addElement(""); fvClassVal.addElement("neutral"); fvClassVal.addElement("negative"); fvClassVal.addElement("positive"); Attribute ClassAttribute = new Attribute("Class", fvClassVal); atts.addElement(ClassAttribute); instdata = new Instances("tweetData", atts, 0); CsvReader data = new CsvReader("../classified data/traindata.csv"); int i = 0; while (data.readRecord()) { double[] vals = new double[instdata.numAttributes()]; String class_id = data.get(0); switch (Integer.parseInt(class_id)) { case 0: class_id = "negative"; break; case 2: class_id = "neutral"; break; case 4: class_id = "positive"; break; } String tweet_content = data.get(5); Instance iInst = new Instance(2); iInst.setValue((Attribute) atts.elementAt(0), tweet_content); iInst.setValue((Attribute) atts.elementAt(1), class_id); instdata.add(iInst); System.out.println("[" + i + "] " + class_id + ":" + tweet_content); i++; } data.close(); StringToWordVector filter = new StringToWordVector(); instdata.setClassIndex(instdata.numAttributes() - 1); filter.setInputFormat(instdata); Instances newdata = Filter.useFilter(instdata, filter); ArffSaver saver = new ArffSaver(); saver.setInstances(newdata); saver.setFile(new File("./data/train2data.arff")); saver.writeBatch(); } catch (Exception ex) { Logger.getLogger(TweetFeatureExtractor.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.entopix.maui.filters.MauiFilter.java
License:Open Source License
/** * Builds the classifier.//from www. j a v a 2 s .c om * @throws MauiFilterException */ private void buildClassifier() throws MauiFilterException { // Generate input format for classifier FastVector atts = new FastVector(); for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (i == documentAtt) { atts.addElement(new Attribute("Term_frequency")); // 0 atts.addElement(new Attribute("IDF")); // 1 atts.addElement(new Attribute("TFxIDF")); // 2 atts.addElement(new Attribute("First_occurrence")); // 3 atts.addElement(new Attribute("Last_occurrence")); // 4 atts.addElement(new Attribute("Spread")); // 5 atts.addElement(new Attribute("Domain_keyphraseness")); // 6 atts.addElement(new Attribute("Length")); // 7 atts.addElement(new Attribute("Generality")); // 8 atts.addElement(new Attribute("Node_degree")); // 9 atts.addElement(new Attribute("Wikipedia_keyphraseness")); // 10 atts.addElement(new Attribute("Wikipedia_inlinks")); // 11 atts.addElement(new Attribute("Wikipedia_generality")); // 12 } else if (i == keyphrasesAtt) { if (nominalClassValue) { FastVector vals = new FastVector(2); vals.addElement("False"); vals.addElement("True"); atts.addElement(new Attribute("Keyphrase?", vals)); } else { atts.addElement(new Attribute("Keyphrase?")); } } } classifierData = new Instances("ClassifierData", atts, 0); classifierData.setClassIndex(numFeatures); if (debugMode) { log.info("--- Converting instances for classifier"); } int totalDocuments = getInputFormat().numInstances(); // Convert pending input instances into data for classifier for (int i = 0; i < totalDocuments; i++) { Instance current = getInputFormat().instance(i); // Get the key phrases for the document String keyphrases = current.stringValue(keyphrasesAtt); HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases); // Get the phrases for the document HashMap<String, Candidate> candidateList = allCandidates.get(current); // Compute the feature values for each phrase and // add the instance to the data for the classifier int countPos = 0; int countNeg = 0; if (debugMode) { log.info("--- Computing features for document " + i + " out of " + totalDocuments + "..."); } for (Candidate candidate : candidateList.values()) { // ignore all candidates that appear less than a threshold if (candidate.getFrequency() < minOccurFrequency) { continue; } // compute feature values double[] vals = computeFeatureValues(candidate, true, hashKeyphrases, candidateList); if (vals[vals.length - 1] == 0) { countNeg++; } else { countPos++; } Instance inst = new Instance(current.weight(), vals); // log.info(candidate + "\t" + inst); classifierData.add(inst); } log.debug(countPos + " positive; " + countNeg + " negative instances"); } log.debug("--- Building classifier"); if (classifier == null) { // Build classifier if (nominalClassValue) { // FilteredClassifier fclass = new FilteredClassifier(); // fclass.setClassifier(new NaiveBayesSimple()); // fclass.setFilter(new Discretize()); // classifier = fclass; classifier = new Bagging(); // try also // try { classifier.setOptions( Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2")); } catch (Exception e) { log.warn("Exception while loading classifier's options " + e.getMessage()); } } else { classifier = new Bagging(); // try also // classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W // weka.classifiers.trees.J48 -- -U -M 2")) ; try { String optionsString = "-P 100 -S 1 -I 10 -W weka.classifiers.trees.M5P -- -U -M 7.0"; String[] options = Utils.splitOptions(optionsString); classifier.setOptions(options); } catch (Exception e) { log.warn("Exception while loading classifier's options " + e.getMessage()); } } } try { classifier.buildClassifier(classifierData); } catch (Exception e) { throw new MauiFilterException("Exception while building classifier " + e.getMessage()); } if (debugMode) { log.info(classifier.toString()); } // Save space classifierData = new Instances(classifierData, 0); }