List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:adams.flow.transformer.WekaExperimentEvaluation.java
License:Open Source License
/** * Sets up the testing algorithm and returns it. * * @param data the experimental data//from www . ja v a 2 s.c o m * @return the configured testing algorithm * @throws Exception If something goes wrong, like testing algorithm of * result matrix cannot be instantiated */ protected Tester getTester(Instances data) throws Exception { Tester ttester; ResultMatrix matrix; String tmpStr; weka.core.Attribute att; List<String> rows; List<String> cols; String selectedList; String selectedListDataset; boolean comparisonFieldSet; int i; String name; Range generatorRange; ttester = (Tester) Utils.deepCopy(m_Tester); matrix = (ResultMatrix) Utils.deepCopy(m_OutputFormat); ttester.setInstances(data); ttester.setSignificanceLevel(m_Significance); ttester.setShowStdDevs(matrix.getShowStdDev()); ttester.setSortColumn(-1); if (!m_SwapRowsAndColumns) { rows = determineColumnNames(m_Row, ExperimenterDefaults.getRow()); cols = determineColumnNames(m_Column, ExperimenterDefaults.getColumn()); } else { cols = determineColumnNames(m_Row, ExperimenterDefaults.getRow()); rows = determineColumnNames(m_Column, ExperimenterDefaults.getColumn()); } selectedList = ""; selectedListDataset = ""; comparisonFieldSet = false; for (i = 0; i < data.numAttributes(); i++) { name = data.attribute(i).name(); if (rows.contains(name.toLowerCase())) { selectedListDataset += "," + (i + 1); } else if (name.toLowerCase().equals("key_run")) { ttester.setRunColumn(i); } else if (name.toLowerCase().equals("key_fold")) { ttester.setFoldColumn(i); } else if (cols.contains(name.toLowerCase())) { selectedList += "," + (i + 1); } else if (name.toLowerCase().contains(ExperimenterDefaults.getComparisonField())) { comparisonFieldSet = true; } else if ((name.toLowerCase().contains("root_relative_squared_error")) && (!comparisonFieldSet)) { comparisonFieldSet = true; } } generatorRange = new Range(); if (selectedList.length() != 0) { try { generatorRange.setRanges(selectedList); } catch (Exception ex) { handleException("Failed to set ranges: " + selectedList, ex); } } ttester.setResultsetKeyColumns(generatorRange); generatorRange = new Range(); if (selectedListDataset.length() != 0) { try { generatorRange.setRanges(selectedListDataset); } catch (Exception ex) { handleException("Failed to set dataset ranges: " + selectedListDataset, ex); } } ttester.setDatasetKeyColumns(generatorRange); tmpStr = m_ComparisonField.getField(); att = data.attribute(tmpStr); if (att == null) throw new Exception("Cannot find comparison field '" + tmpStr + "' in data!"); ttester.setDisplayedResultsets(null); // all ttester.setResultMatrix(matrix); return ttester; }
From source file:adams.flow.transformer.WekaExperimentEvaluation.java
License:Open Source License
/** * Evaluates the experiment data.//from ww w.j av a 2 s .c o m * * @param data the data to evaluate * @throws Exception If something goes wrong, like loading * data fails or comparison field invalid */ protected void evaluateExperiment(Instances data) throws Exception { Tester ttester; StringBuilder outBuff; int compareCol; int tType; String tmpStr; weka.core.Attribute att; // setup testing algorithm ttester = getTester(data); // evaluate experiment tmpStr = m_ComparisonField.getField(); att = data.attribute(tmpStr); if (att == null) throw new Exception("Cannot find comparison field '" + tmpStr + "' in data!"); compareCol = att.index(); tType = m_TestBase; outBuff = new StringBuilder(); if (m_OutputHeader) { outBuff.append(ttester.header(compareCol)); outBuff.append("\n"); } try { if (tType < ttester.getNumResultsets()) outBuff.append(ttester.multiResultsetFull(tType, compareCol)); else if (tType == ttester.getNumResultsets()) outBuff.append(ttester.multiResultsetSummary(compareCol)); else outBuff.append(ttester.multiResultsetRanking(compareCol)); outBuff.append("\n"); } catch (Exception ex) { outBuff.append(ex.getMessage() + "\n"); } // broadcast evaluation m_OutputToken = new Token(outBuff.toString()); }
From source file:adams.flow.transformer.WekaExtractArray.java
License:Open Source License
/** * Executes the flow item.// ww w .j ava 2 s .co m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Double[] array; Instances inst; SpreadSheet sheet; int i; int index; Cell cell; result = null; array = null; if (m_InputToken.getPayload() instanceof Instances) { inst = (Instances) m_InputToken.getPayload(); if (m_Type == ExtractionType.COLUMN) m_Index.setMax(inst.numAttributes()); else m_Index.setMax(inst.numInstances()); index = m_Index.getIntIndex(); if (index == -1) result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")"; else if ((m_Type == ExtractionType.COLUMN) && !inst.attribute(index).isNumeric()) result = "Column " + m_Index + " is not numeric!"; if (result == null) { if (m_Type == ExtractionType.COLUMN) { array = new Double[inst.numInstances()]; for (i = 0; i < array.length; i++) array[i] = inst.instance(i).value(index); } else { array = new Double[inst.numAttributes()]; for (i = 0; i < array.length; i++) array[i] = inst.instance(index).value(i); } } } else { sheet = (SpreadSheet) m_InputToken.getPayload(); if (m_Type == ExtractionType.COLUMN) m_Index.setMax(sheet.getColumnCount()); else m_Index.setMax(sheet.getRowCount()); index = m_Index.getIntIndex(); if (index == -1) result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")"; else if ((m_Type == ExtractionType.COLUMN) && !sheet.isNumeric(index, true)) result = "Column " + m_Index + " is not numeric!"; if (result == null) { if (m_Type == ExtractionType.COLUMN) { array = new Double[sheet.getRowCount()]; for (i = 0; i < array.length; i++) { cell = sheet.getCell(i, index); if ((cell != null) && !cell.isMissing()) array[i] = cell.toDouble(); } } else { array = new Double[sheet.getColumnCount()]; for (i = 0; i < array.length; i++) { cell = sheet.getCell(index, i); if ((cell != null) && !cell.isMissing()) array[i] = cell.toDouble(); } } } } if (array != null) m_OutputToken = new Token(array); return result; }
From source file:adams.flow.transformer.WekaGetInstancesValue.java
License:Open Source License
/** * Executes the flow item./*from w w w. ja v a 2s. c om*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances inst; int index; int row; result = null; inst = (Instances) m_InputToken.getPayload(); m_Column.setData(inst); m_Row.setMax(inst.numInstances()); index = m_Column.getIntIndex(); row = m_Row.getIntIndex(); if (row == -1) result = "Failed to retrieve row: " + m_Row.getIndex(); else if (index == -1) result = "Failed to retrieve column: " + m_Column.getIndex(); if (result == null) { try { if (inst.instance(row).isMissing(index)) { m_OutputToken = new Token("?"); } else { switch (inst.attribute(index).type()) { case Attribute.NUMERIC: m_OutputToken = new Token(inst.instance(row).value(index)); break; case Attribute.DATE: case Attribute.NOMINAL: case Attribute.STRING: case Attribute.RELATIONAL: m_OutputToken = new Token(inst.instance(row).stringValue(index)); break; default: result = "Unhandled attribute type: " + inst.attribute(index).type(); } } } catch (Exception e) { result = handleException("Failed to obtain value from dataset:", e); } } return result; }
From source file:adams.flow.transformer.WekaInstanceDumper.java
License:Open Source License
/** * Turns the dataset header into the appropriate format. * * @param header the header to convert * @return the generated output/*from ww w .j av a 2s . c om*/ */ protected String createHeader(Instances header) { StringBuilder result; int i; result = new StringBuilder(); switch (m_OutputFormat) { case ARFF: result.append(new Instances(header, 0).toString()); break; case CSV: for (i = 0; i < header.numAttributes(); i++) { if (i > 0) result.append(","); result.append(Utils.quote(header.attribute(i).name())); } break; case TAB: for (i = 0; i < header.numAttributes(); i++) { if (i > 0) result.append("\t"); result.append(Utils.quote(header.attribute(i).name())); } break; default: throw new IllegalStateException("Unhandled output format: " + m_OutputFormat); } return result.toString(); }
From source file:adams.flow.transformer.WekaInstanceEvaluator.java
License:Open Source License
/** * Determines the name of the evaluation attribute. * * @param data the original input data * @return the generated name/*from www . j av a 2 s . co m*/ * @see #m_AttributeName */ protected String determineAttributeName(Instances data) { String result; int i; result = ATTRIBUTE_NAME; i = 0; while (data.attribute(result) != null) { i++; result = ATTRIBUTE_NAME + i; } m_AttributeName = result; if (isLoggingEnabled()) getLogger().info("Chosen attribute name: " + m_AttributeName); return result; }
From source file:adams.flow.transformer.WekaInstancesAppend.java
License:Open Source License
/** * Executes the flow item./*from w w w . ja v a 2 s . c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; String[] filesStr; File[] files; int i; int n; Instances[] inst; Instances full; String msg; StringBuilder relation; double[] values; result = null; // get filenames files = null; inst = null; if (m_InputToken.getPayload() instanceof String[]) { filesStr = (String[]) m_InputToken.getPayload(); files = new File[filesStr.length]; for (i = 0; i < filesStr.length; i++) files[i] = new PlaceholderFile(filesStr[i]); } else if (m_InputToken.getPayload() instanceof File[]) { files = (File[]) m_InputToken.getPayload(); } else if (m_InputToken.getPayload() instanceof Instances[]) { inst = (Instances[]) m_InputToken.getPayload(); } else { throw new IllegalStateException("Unhandled input type: " + m_InputToken.getPayload().getClass()); } // load data? if (files != null) { inst = new Instances[files.length]; for (i = 0; i < files.length; i++) { try { inst[i] = DataSource.read(files[i].getAbsolutePath()); } catch (Exception e) { result = handleException("Failed to load dataset: " + files[i], e); break; } } } // test compatibility if (result == null) { for (i = 0; i < inst.length - 1; i++) { for (n = i + 1; n < inst.length; n++) { if ((msg = inst[i].equalHeadersMsg(inst[n])) != null) { result = "Dataset #" + (i + 1) + " and #" + (n + 1) + " are not compatible:\n" + msg; break; } } if (result != null) break; } } // append if (result == null) { full = new Instances(inst[0]); relation = new StringBuilder(inst[0].relationName()); for (i = 1; i < inst.length; i++) { relation.append("+" + inst[i].relationName()); for (Instance row : inst[i]) { values = row.toDoubleArray(); for (n = 0; n < values.length; n++) { if (row.attribute(n).isString()) values[n] = full.attribute(n).addStringValue(row.stringValue(n)); else if (row.attribute(n).isRelationValued()) values[n] = full.attribute(n).addRelation(row.relationalValue(n)); } if (row instanceof SparseInstance) row = new SparseInstance(row.weight(), values); else row = new DenseInstance(row.weight(), values); full.add(row); } } full.setRelationName(relation.toString()); m_OutputToken = new Token(full); } return result; }
From source file:adams.flow.transformer.WekaInstancesHistogramRanges.java
License:Open Source License
/** * Executes the flow item.// w w w . j a v a 2s. c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Instances data; int i; int n; Index index; ArrayHistogram stat; result = null; m_Queue.clear(); try { sheet = null; data = (Instances) m_InputToken.getPayload(); stat = new ArrayHistogram(); stat.setBinCalculation(m_BinCalculation); stat.setNumBins(m_NumBins); stat.setBinWidth(m_BinWidth); stat.setNormalize(m_Normalize); stat.setUseFixedMinMax(m_UseFixedMinMax); stat.setManualMin(m_ManualMin); stat.setManualMax(m_ManualMax); stat.setDisplayRanges(true); stat.setNumDecimals(m_NumDecimals); for (i = 0; i < m_Locations.length; i++) { switch (m_DataType) { case ROW_BY_INDEX: index = new Index(m_Locations[i].stringValue()); index.setMax(data.numInstances()); stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray())); break; case COLUMN_BY_INDEX: index = new WekaAttributeIndex(m_Locations[i].stringValue()); ((WekaAttributeIndex) index).setData(data); stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex()))); break; case COLUMN_BY_REGEXP: for (n = 0; n < data.numAttributes(); n++) { if (data.attribute(n).name().matches(m_Locations[i].stringValue())) { stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n))); break; } } break; default: throw new IllegalStateException("Unhandled data type: " + m_DataType); } } sheet = stat.calculate().toSpreadSheet(); } catch (Exception e) { result = handleException("Error generating the ranges: ", e); sheet = null; } if (sheet != null) { for (i = 0; i < sheet.getColumnCount(); i++) m_Queue.add(sheet.getColumnName(i)); } return result; }
From source file:adams.flow.transformer.WekaInstancesInfo.java
License:Open Source License
/** * Generates attributes statistics.//from www.j a va2s.c om * * @param data the dataset to use * @param index the 0-based index of the attribute */ protected SpreadSheet getAttributeStats(Instances data, int index) { SpreadSheet result; Attribute att; AttributeStats stats; Row row; int i; result = new DefaultSpreadSheet(); result.setName("Attribute statistics - #" + (index + 1) + " " + data.attribute(index).name()); // header row = result.getHeaderRow(); row.addCell("S").setContent("Statistic"); row.addCell("V").setContent("Value"); // data att = data.attribute(index); if (att.isNominal()) { stats = data.attributeStats(index); addStatistic(result, "Total", stats.totalCount); addStatistic(result, "Missing", stats.missingCount); addStatistic(result, "Unique", stats.uniqueCount); addStatistic(result, "Distinct", stats.distinctCount); addStatistic(result, "Integer-like", stats.intCount); addStatistic(result, "Float-like", stats.realCount); for (i = 0; i < stats.nominalCounts.length; i++) addStatistic(result, "Label-" + (i + 1) + "-" + att.value(i), stats.nominalCounts[i]); for (i = 0; i < stats.nominalWeights.length; i++) addStatistic(result, "Weight-" + (i + 1) + "-" + att.value(i), stats.nominalWeights[i]); } else if (att.isDate()) { if (m_DateFormat == null) m_DateFormat = DateUtils.getTimestampFormatter(); stats = data.attributeStats(index); addStatistic(result, "Count", stats.numericStats.count); addStatistic(result, "Min", formatDate(stats.numericStats.min)); addStatistic(result, "Max", formatDate(stats.numericStats.max)); addStatistic(result, "Mean", formatDate(stats.numericStats.mean)); addStatistic(result, "StdDev (in days)", stats.numericStats.stdDev / 1000 / 60 / 60 / 24); } else if (att.isNumeric()) { stats = data.attributeStats(index); addStatistic(result, "Count", stats.numericStats.count); addStatistic(result, "Min", stats.numericStats.min); addStatistic(result, "Max", stats.numericStats.max); addStatistic(result, "Mean", stats.numericStats.mean); addStatistic(result, "StdDev", stats.numericStats.stdDev); addStatistic(result, "Sum", stats.numericStats.sum); addStatistic(result, "Sum^2", stats.numericStats.sumSq); } return result; }
From source file:adams.flow.transformer.WekaInstancesInfo.java
License:Open Source License
/** * Executes the flow item.// w w w. ja va 2s .c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances inst; int index; int labelIndex; double[] dist; Enumeration enm; int i; result = null; if (m_InputToken.getPayload() instanceof Instance) inst = ((Instance) m_InputToken.getPayload()).dataset(); else inst = (Instances) m_InputToken.getPayload(); m_AttributeIndex.setData(inst); index = m_AttributeIndex.getIntIndex(); m_Queue.clear(); switch (m_Type) { case FULL: m_Queue.add(inst.toSummaryString()); break; case FULL_ATTRIBUTE: m_Queue.add(getAttributeStats(inst, index)); break; case FULL_CLASS: if (inst.classIndex() > -1) m_Queue.add(getAttributeStats(inst, inst.classIndex())); break; case HEADER: m_Queue.add(new Instances(inst, 0).toString()); break; case RELATION_NAME: m_Queue.add(inst.relationName()); break; case ATTRIBUTE_NAME: if (index != -1) m_Queue.add(inst.attribute(index).name()); break; case ATTRIBUTE_NAMES: for (i = 0; i < inst.numAttributes(); i++) m_Queue.add(inst.attribute(i).name()); break; case LABELS: if (index != -1) { enm = inst.attribute(index).enumerateValues(); while (enm.hasMoreElements()) m_Queue.add(enm.nextElement()); } break; case CLASS_LABELS: if (inst.classIndex() > -1) { enm = inst.classAttribute().enumerateValues(); while (enm.hasMoreElements()) m_Queue.add(enm.nextElement()); } break; case LABEL_COUNT: if (index > -1) { m_LabelIndex.setData(inst.attribute(index)); labelIndex = m_LabelIndex.getIntIndex(); m_Queue.add(inst.attributeStats(index).nominalCounts[labelIndex]); } break; case LABEL_COUNTS: if (index > -1) m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(index).nominalCounts)); break; case LABEL_DISTRIBUTION: if (index > -1) { dist = new double[inst.attributeStats(index).nominalCounts.length]; for (i = 0; i < dist.length; i++) dist[i] = inst.attributeStats(index).nominalCounts[i]; Utils.normalize(dist); m_Queue.add(StatUtils.toNumberArray(dist)); } break; case CLASS_LABEL_COUNT: if (inst.classIndex() > -1) { m_LabelIndex.setData(inst.classAttribute()); labelIndex = m_LabelIndex.getIntIndex(); m_Queue.add(inst.attributeStats(inst.classIndex()).nominalCounts[labelIndex]); } break; case CLASS_LABEL_COUNTS: if (inst.classIndex() > -1) m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(inst.classIndex()).nominalCounts)); break; case CLASS_LABEL_DISTRIBUTION: if (inst.classIndex() > -1) { dist = new double[inst.attributeStats(inst.classIndex()).nominalCounts.length]; for (i = 0; i < dist.length; i++) dist[i] = inst.attributeStats(inst.classIndex()).nominalCounts[i]; Utils.normalize(dist); m_Queue.add(StatUtils.toNumberArray(dist)); } break; case NUM_ATTRIBUTES: m_Queue.add(inst.numAttributes()); break; case NUM_INSTANCES: m_Queue.add(inst.numInstances()); break; case NUM_CLASS_LABELS: if ((inst.classIndex() != -1) && inst.classAttribute().isNominal()) m_Queue.add(inst.classAttribute().numValues()); break; case NUM_LABELS: if ((index != -1) && inst.attribute(index).isNominal()) m_Queue.add(inst.attribute(index).numValues()); break; case NUM_DISTINCT_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).distinctCount); break; case NUM_UNIQUE_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).uniqueCount); break; case NUM_MISSING_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).missingCount); break; case MIN: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.min); break; case MAX: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.max); break; case MEAN: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.mean); break; case STDEV: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.stdDev); break; case ATTRIBUTE_TYPE: if (index != -1) m_Queue.add(Attribute.typeToString(inst.attribute(index))); break; case CLASS_TYPE: if (inst.classIndex() != -1) m_Queue.add(Attribute.typeToString(inst.classAttribute())); break; default: result = "Unhandled info type: " + m_Type; } return result; }