List of usage examples for weka.core Instance stringValue
public String stringValue(Attribute att);
From source file:kea.NumbersFilter.java
License:Open Source License
/** * Converts an instance. A phrase boundary is inserted where * a number is found./*from w w w.j av a 2s . c o m*/ */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if ((!instance.attribute(i).isString()) || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); StringTokenizer tok = new StringTokenizer(str, " \t\n", true); while (tok.hasMoreTokens()) { String token = tok.nextToken(); // Everything that doesn't contain at least // one letter is considered to be a number boolean isNumber = true; for (int j = 0; j < token.length(); j++) { if (Character.isLetter(token.charAt(j))) { isNumber = false; break; } } if (!isNumber) { resultStr.append(token); } else { if (token.equals(" ") || token.equals("\t") || token.equals("\n")) { resultStr.append(token); } else { resultStr.append(" \n "); } } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:knn.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { // set dist to 0 double distance = 0; // from index 0 to left and right side's number of attributes - 1... for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { // if left and right side's attributes are numeric, set the distance equal // to the value of left value - right value all squared if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {// www .j a v a 2s .co m // else add 5 to the distance unless left and right side's string converted // values are equal to one another, in which case set dist back to zero if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } distance += 5; } } return distance; }
From source file:knn.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { // set dist to 0 double distance = 0; // from index 0 to number of attributes - 1 on both sides... for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { // if left and right side's attributes are numbers set distance equal // to absolute value of left's value - right's value if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {//from w w w . j a va 2s. c o m // else add 5 to distance unless left and right are equal, in which // case set the distance back to 0 if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance = 0; } distance += 5; } } return distance; }
From source file:knnclassifier.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {//from w w w . j a v a2 s . com if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } distance += 5; } } return distance; }
From source file:knnclassifier.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {//www . ja v a 2 s .c o m if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance = 0; } distance += 5; } } return distance; }
From source file:lattice.Lattice.java
License:Open Source License
/** * Constructor of a lattice over the given variables of the dataset. * /* w w w . j a v a2s. c o m*/ * @param dataset */ public Lattice(Instances dataset) { // ~ initialise internal structure for counting (TID sets) this.nbInstances = dataset.numInstances(); this.nbVariables = dataset.numAttributes(); BitSet[][] presence = new BitSet[nbVariables][]; TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>(); int[] nbValuesForAttribute = new int[nbVariables]; for (int a = 0; a < nbVariables; a++) { nbValuesForAttribute[a] = dataset.numDistinctValues(a) + 1; //+1 for missing presence[a] = new BitSet[nbValuesForAttribute[a]]; allAttributesNumbers.add(a); for (int v = 0; v < presence[a].length; v++) { presence[a][v] = new BitSet(); } } for (int i = 0; i < nbInstances; i++) { Instance row = dataset.instance(i); for (int a = 0; a < nbVariables; a++) { int indexOfValue; if (row.isMissing(a)) { // indexOfValue = (int) dataset.meanOrMode(a); indexOfValue = dataset.numDistinctValues(a); //missing at the end } else { String value = row.stringValue(a); indexOfValue = row.attribute(a).indexOfValue(value); } presence[a][indexOfValue].set(i); } } // initialise the first nodes of the lattice (i.e., the ones // corresponding to single variables this.all = new LatticeNode(this, nbValuesForAttribute); this.singleNodes = new LatticeNode[nbVariables]; for (int a = 0; a < nbVariables; a++) { int[] variablesNumbers = { a }; LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all); singleNodes[a] = node; } }
From source file:lattice.Lattice.java
License:Open Source License
public Lattice(Instances structure, ArffReader loader) throws IOException { // ~ initialise internal structure for counting (TID sets) this.nbInstances = 0; this.nbVariables = structure.numAttributes(); BitSet[][] presence = new BitSet[nbVariables][]; TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>(); int[] nbValuesForAttribute = new int[nbVariables]; for (int a = 0; a < nbVariables; a++) { nbValuesForAttribute[a] = structure.numDistinctValues(a) + 1;//+1 for missing presence[a] = new BitSet[nbValuesForAttribute[a]]; allAttributesNumbers.add(a);// w ww . ja va2 s. co m for (int v = 0; v < presence[a].length; v++) { presence[a][v] = new BitSet(); } } Instance row; while ((row = loader.readInstance(structure)) != null) { for (int a = 0; a < nbVariables; a++) { int indexOfValue; if (row.isMissing(a)) { indexOfValue = structure.numDistinctValues(a);//missing at the end } else { String value = row.stringValue(a); indexOfValue = row.attribute(a).indexOfValue(value); } presence[a][indexOfValue].set(this.nbInstances); } this.nbInstances++; } // initialise the first nodes of the lattice (i.e., the ones // corresponding to single variables this.all = new LatticeNode(this, nbValuesForAttribute); this.singleNodes = new LatticeNode[nbVariables]; for (int a = 0; a < nbVariables; a++) { int[] variablesNumbers = { a }; LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all); singleNodes[a] = node; } }
From source file:lu.lippmann.cdb.common.gui.dataset.InstanceFormatter.java
License:Open Source License
public static String htmlFormat(final Instance inst, final boolean withHTMLHeader) { final StringBuilder sb = new StringBuilder(); if (withHTMLHeader) sb.append("<html><body>"); for (int i = 0; i < inst.numAttributes(); i++) { sb.append(StringEscapeUtils.escapeHtml(inst.attribute(i).name())).append(" = "); sb.append("<b>"); if (inst.attribute(i).isNominal() || inst.attribute(i).isString()) { sb.append(StringEscapeUtils.escapeHtml(inst.stringValue(i))); } else if (inst.attribute(i).isDate()) { final Calendar cal = Calendar.getInstance(); cal.setTimeInMillis((long) inst.value(i)); sb.append(FormatterUtil.DATE_FORMAT.format(cal.getTime())); } else if (inst.attribute(i).isNumeric()) { sb.append(inst.value(i));/* w w w . j a va2 s.c om*/ } sb.append("</b>"); sb.append("<br/>"); } if (withHTMLHeader) sb.append("</body></html>"); return sb.toString(); }
From source file:lu.lippmann.cdb.ext.hydviga.data.StationsDataProvider.java
License:Open Source License
private ChartPanel buildMapPanel(final Instances dataSet, final int xidx, final int yidx, final boolean withLegend) { final XYSeriesCollection data = new XYSeriesCollection(); final Map<Integer, java.util.List<Instance>> filteredInstances = new HashMap<Integer, java.util.List<Instance>>(); final int classIndex = dataSet.classIndex(); if (classIndex < 0) { final XYSeries series = new XYSeries("Serie", false); for (int i = 0; i < dataSet.numInstances(); i++) { series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx)); }/*from www . j av a2 s. co m*/ data.addSeries(series); } else { final Set<String> pvs = new TreeSet<String>( WekaDataStatsUtil.getPresentValuesForNominalAttribute(dataSet, classIndex)); int p = 0; for (final String pv : pvs) { final XYSeries series = new XYSeries(pv, false); for (int i = 0; i < dataSet.numInstances(); i++) { if (dataSet.instance(i).stringValue(classIndex).equals(pv)) { if (!filteredInstances.containsKey(p)) { filteredInstances.put(p, new ArrayList<Instance>()); } filteredInstances.get(p).add(dataSet.instance(i)); series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx)); } } data.addSeries(series); p++; } } final JFreeChart chart = ChartFactory.createScatterPlot(null, // chart title dataSet.attribute(xidx).name(), // x axis label dataSet.attribute(yidx).name(), // y axis label data, // data PlotOrientation.VERTICAL, withLegend, // include legend true, // tooltips false // urls ); final XYPlot xyPlot = (XYPlot) chart.getPlot(); xyPlot.setBackgroundImage(shapeImage); final XYItemRenderer renderer = xyPlot.getRenderer(); final XYToolTipGenerator gen = new XYToolTipGenerator() { @Override public String generateToolTip(XYDataset dataset, int series, int item) { if (classIndex < 0) { return InstanceFormatter.htmlFormat(dataSet.instance(item), true); } else { return InstanceFormatter.htmlFormat(filteredInstances.get(series).get(item), true); } } }; xyPlot.getRangeAxis().setVisible(false); xyPlot.getDomainAxis().setVisible(false); xyPlot.getRangeAxis().setLowerBound(60000); xyPlot.getRangeAxis().setUpperBound(135000); xyPlot.getDomainAxis().setLowerBound(45000); xyPlot.getDomainAxis().setUpperBound(110000); xyPlot.setDomainGridlinesVisible(false); xyPlot.setRangeGridlinesVisible(false); xyPlot.setBackgroundPaint(Color.white); int nbSeries; if (classIndex < 0) { nbSeries = 1; } else { nbSeries = filteredInstances.keySet().size(); } for (int i = 0; i < nbSeries; i++) { renderer.setSeriesToolTipGenerator(i, gen); } final XYItemLabelGenerator lg = new XYItemLabelGenerator() { @Override public String generateLabel(final XYDataset ds, final int series, final int item) { final Instance iii = filteredInstances.get(series).get(item); if (iii.stringValue(3).equals(SELECTED_STATUS)) { final String label = iii.stringValue(0); return label.substring(0, label.length() - 4); } else return null; } }; xyPlot.getRenderer().setBaseItemLabelGenerator(lg); xyPlot.getRenderer().setBaseItemLabelsVisible(true); xyPlot.getRenderer().setBaseItemLabelFont(new Font("Tahoma", Font.PLAIN, 12)); xyPlot.getRenderer().setSeriesPaint(1, Color.BLUE); xyPlot.getRenderer().setSeriesPaint(0, new Color(210, 210, 210)); xyPlot.getRenderer().setSeriesPaint(2, Color.DARK_GRAY); //System.out.println("shape -> "+xyPlot.getRenderer().getSeriesStroke(0)); final ChartPanel cp = new ChartPanel(chart); cp.setDomainZoomable(false); cp.setRangeZoomable(false); return cp; }
From source file:lu.lippmann.cdb.ext.hydviga.ui.GapsTabView.java
License:Open Source License
/** * {@inheritDoc}//w ww .j av a 2 s . c o m */ @Override public void update0(final Instances dataSet) throws Exception { if (this.dateAttributeField != null) { this.jxp.remove(this.dateAttributeField); this.dateAttributeField = null; this.jxp.updateUI(); } final java.util.List<String> dateAttributeNames = WekaDataStatsUtil.getDateAttributeNames(dataSet); final boolean hasDateAttributes = (!dateAttributeNames.isEmpty()) /*&&(WekaDataStatsUtil.getNumericAttributesIndexes(dataSet).size()>0)*/; if (hasDateAttributes) { this.dateAttributeField = new JXComboBox(dateAttributeNames.toArray()); this.dateAttributeField.setBorder(new TitledBorder("Date attribute")); this.jxp.add(this.dateAttributeField, BorderLayout.SOUTH); this.dateAttributeField.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { fillTabs(dataSet); } }); new AbstractSimpleAsync<Void>(true) { @Override public Void execute() throws Exception { fillTabs(dataSet); return null; } @Override public void onSuccess(Void result) { } @Override public void onFailure(Throwable caught) { caught.printStackTrace(); } }.start(); this.actionsForFictiveGapsPanel.removeAll(); //final JComboBox seriesWithoutGapCB=new JComboBox(WekaTimeSeriesUtil.getNamesOfAttributesWithoutGap(dataSet).toArray()); final JComboBox seriesWithoutGapCB = new JComboBox( WekaDataStatsUtil.getAttributeNames(dataSet).toArray()); seriesWithoutGapCB.setBorder(new TitledBorder("Fictive gap in")); this.actionsForFictiveGapsPanel.add(seriesWithoutGapCB); final JComboBox sizeGapCB = new JComboBox(new Object[] { 10, 50, 100, 200, 400, 500 }); sizeGapCB.setBorder(new TitledBorder("Size of the fictive gap")); this.actionsForFictiveGapsPanel.add(sizeGapCB); final Object[] partChoice = new Object[PARTS_COUNT]; for (int iii = 0; iii < PARTS_COUNT; iii++) { partChoice[iii] = iii + "/" + PARTS_COUNT; } final JComboBox positionGapCB = new JComboBox(partChoice); positionGapCB.setBorder(new TitledBorder("Position of the fictive gap")); this.actionsForFictiveGapsPanel.add(positionGapCB); this.fictiveGapButton = new JXButton("Create a fictive gap"); this.actionsForFictiveGapsPanel.add(this.fictiveGapButton); this.fictiveGapButton.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { /* create a fake gap */ final Attribute attr = dataSet.attribute(seriesWithoutGapCB.getSelectedItem().toString()); final int dateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(dataSet); final int position = ((positionGapCB.getSelectedIndex() + 1) * dataSet.numInstances()) / PARTS_COUNT; final int gapsize = Integer.valueOf(sizeGapCB.getSelectedItem().toString()); /* show it */ final GapFillingFrame jxf = new GapFillingFrame(getAbstractTabView(), new Instances(dataSet), attr, dateIdx, GapsUtil.getCountOfValuesBeforeAndAfter(gapsize), position, gapsize, gcp, false); //jxf.setSize(new Dimension(900,700)); //jxf.setExtendedState(Frame.MAXIMIZED_BOTH); jxf.setLocationRelativeTo(jxp); jxf.setVisible(true); //jxf.setResizable(false); } }); this.showKnowledgeDBButton = new JXButton("Show KDB"); this.actionsForFictiveGapsPanel.add(this.showKnowledgeDBButton); this.showKnowledgeDBButton.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final DatasetView view = new DatasetView("KnowledgeDB", eventPublisher, commandDispatcher, applicationContext); view.setDataSet(GapFillingKnowledgeDB.getKnowledgeDB()).setAsVisible(true); } catch (final Exception ee) { ee.printStackTrace(); } } }); this.inspectKnowledgeDBButton = new JXButton("Inspect KDB"); this.actionsForFictiveGapsPanel.add(this.inspectKnowledgeDBButton); this.inspectKnowledgeDBButton.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final int dateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(dataSet); new GapFillingKnowledgeDBExplorerFrame(dataSet, dateIdx, gcp); } catch (final Exception ee) { ee.printStackTrace(); } } }); this.showKnowledgeDBWithTrueCasesButton = new JXButton("Show KDB with true cases"); this.actionsForFictiveGapsPanel.add(this.showKnowledgeDBWithTrueCasesButton); this.showKnowledgeDBWithTrueCasesButton.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final StringBuilder sb = new StringBuilder("@relation blabla\n"); sb.append("@attribute serieName string\n"); sb.append("@attribute serieX numeric\n"); sb.append("@attribute serieY numeric\n"); sb.append("@attribute year numeric\n"); sb.append("@attribute season {Winter,Spring,Summer,Autumn}\n"); sb.append("@attribute gapSize numeric\n"); sb.append("@attribute gapPosition numeric\n"); sb.append("@attribute isDuringRising {true,false}\n"); sb.append("@attribute flow string\n"); sb.append("@attribute hasDownstream {false,true}\n"); sb.append("@attribute hasUpstream {false,true}\n"); sb.append("@attribute isReal {false,true}\n"); sb.append( "@attribute algo {Interpolation,EM,REG,REPTREE,M5P,ZeroR,ANN,NEARESTNEIGHBOUR}\n"); sb.append("@attribute useDiscretizedTime {false,true}\n"); sb.append("@attribute useMostSimilar {false,true}\n"); sb.append("@attribute useNearest {true,false}\n"); sb.append("@attribute useDownstream {false,true}\n"); sb.append("@attribute useUpstream {true,false}\n"); sb.append("@attribute mae numeric\n"); sb.append("@attribute rmse numeric\n"); sb.append("@attribute rsr numeric\n"); sb.append("@attribute pbias numeric\n"); sb.append("@attribute ns numeric\n"); sb.append("@attribute ioa numeric\n"); sb.append("@attribute wasTheBestSolution {true,false}\n"); sb.append("@data\n"); /* true cases */ final Calendar cal = Calendar.getInstance(); final int dateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(dataSet); final Instances gapsDescriptionsDataset = GapsUtil.buildGapsDescription(gcp, dataSet, dateIdx); final int gddc = gapsDescriptionsDataset.numInstances(); for (int i = 0; i < gddc; i++) { final Instance trueCase = gapsDescriptionsDataset.instance(i); sb.append(trueCase.stringValue(0)); // serie sb.append(","); sb.append(gcp.getCoordinates(trueCase.stringValue(0))[0]); // x sb.append(","); sb.append(gcp.getCoordinates(trueCase.stringValue(0))[1]); // y sb.append(","); cal.setTime(FormatterUtil.DATE_FORMAT.parse(trueCase.stringValue(1))); // year sb.append(cal.get(Calendar.YEAR)); sb.append(","); sb.append(trueCase.stringValue(2).split("/")[0]); // season sb.append(","); sb.append(trueCase.value(4)); //gapsize sb.append(","); sb.append(trueCase.value(5)); //gap position sb.append(","); sb.append(trueCase.stringValue(10).equals("true")); //rising sb.append(","); sb.append(trueCase.stringValue(11)); // flow sb.append(","); sb.append(!trueCase.stringValue(9).equals("n/a")); //downstream sb.append(","); sb.append(!trueCase.stringValue(8).equals("n/a")); // upstream sb.append(","); sb.append("true"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append("\n"); } /* the simulated cases from the knowledge DB */ final Instances knowledgeDB = GapFillingKnowledgeDB.getKnowledgeDB(); final int kni = knowledgeDB.numInstances(); for (int i = 0; i < kni; i++) { final Instance simulatedCase = knowledgeDB.instance(i); sb.append(simulatedCase.stringValue(0)); // name sb.append(","); sb.append(simulatedCase.value(1)); //x sb.append(","); sb.append(simulatedCase.value(2)); //y sb.append(","); sb.append(simulatedCase.value(6)); //year sb.append(","); sb.append(simulatedCase.stringValue(5)); //season sb.append(","); sb.append(simulatedCase.value(3)); // size sb.append(","); sb.append(simulatedCase.value(4)); // position sb.append(","); sb.append(simulatedCase.stringValue(7)); // rising sb.append(","); sb.append(simulatedCase.stringValue(8)); //flow sb.append(","); sb.append(simulatedCase.stringValue(9)); //downstream sb.append(","); sb.append(simulatedCase.stringValue(10)); // upstream sb.append(","); sb.append("false"); // real sb.append(","); sb.append(simulatedCase.stringValue(11)); //algo sb.append(","); sb.append(simulatedCase.stringValue(12)); // discr time sb.append(","); sb.append(simulatedCase.stringValue(13)); // most similar sb.append(","); sb.append(simulatedCase.stringValue(14)); // nearest sb.append(","); sb.append(simulatedCase.stringValue(15)); //downstream sb.append(","); sb.append(simulatedCase.stringValue(16)); //upstream sb.append(","); sb.append(simulatedCase.value(17)); //mae sb.append(","); sb.append(simulatedCase.value(18)); //rmse sb.append(","); sb.append(simulatedCase.value(19)); //rsr sb.append(","); sb.append(simulatedCase.value(20)); //pbias sb.append(","); sb.append(simulatedCase.value(21)); //ns sb.append(","); sb.append(simulatedCase.value(22)); //ioa sb.append(","); sb.append(simulatedCase.stringValue(23)); // best sb.append("\n"); } //System.out.println(sb.toString()); final Instances newds = WekaDataAccessUtil.loadInstancesFromARFFString(sb.toString(), false, false); final DatasetView view = new DatasetView("KnowledgeDB with true cases", eventPublisher, commandDispatcher, applicationContext); view.setDataSet(newds).setAsVisible(true); } catch (final Exception ee) { ee.printStackTrace(); } } }); this.rebuildKnowledgeDBButton = new JXButton("Rebuild KDB"); this.actionsForFictiveGapsPanel.add(this.rebuildKnowledgeDBButton); this.rebuildKnowledgeDBButton.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { rebuildKnowledgeDB(dataSet); } }); } else { throw new Exception("No date attributes in the dataset."); } }