List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:adams.data.outlier.AbstractInstanceOutlierDetectorTestCase.java
License:Open Source License
/** * Loads the data to process.//from w w w. j av a2s .c o m * * @param filename the filename to load (without path) * @return the first instance in the dataset, or null if it failed * to load; class attribute is always the last */ protected Instance load(String filename) { Instance result; Instances data; result = new Instance(); try { m_TestHelper.copyResourceToTmp(filename); data = DataSource.read(new TmpFile(filename).getAbsolutePath()); data.setClassIndex(data.numAttributes() - 1); result.set(data.instance(0)); } catch (Exception e) { e.printStackTrace(); result = null; } finally { m_TestHelper.deleteFileFromTmp(filename); } return result; }
From source file:adams.flow.transformer.WekaBootstrapping.java
License:Open Source License
/** * Executes the flow item./*from w w w.j a v a 2s .c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Row row; Evaluation evalAll; Evaluation eval; WekaEvaluationContainer cont; TIntList indices; Random random; int i; int iteration; int size; List<Prediction> preds; Instances header; Instances data; ArrayList<Attribute> atts; Instance inst; boolean numeric; int classIndex; Double[] errors; Double[] errorsRev; Percentile<Double> perc; Percentile<Double> percRev; TIntList subset; result = null; if (m_InputToken.getPayload() instanceof Evaluation) { evalAll = (Evaluation) m_InputToken.getPayload(); } else { cont = (WekaEvaluationContainer) m_InputToken.getPayload(); evalAll = (Evaluation) cont.getValue(WekaEvaluationContainer.VALUE_EVALUATION); } if ((evalAll.predictions() == null) || (evalAll.predictions().size() == 0)) result = "No predictions available!"; if (result == null) { // init spreadsheet sheet = new DefaultSpreadSheet(); row = sheet.getHeaderRow(); row.addCell("S").setContentAsString("Subsample"); for (EvaluationStatistic s : m_StatisticValues) row.addCell(s.toString()).setContentAsString(s.toString()); for (i = 0; i < m_Percentiles.length; i++) { switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]); break; case PREDICTED_MINUS_ACTUAL: row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]); break; case ABSOLUTE: row.addCell("perc-Abs-" + i).setContentAsString("Percentile-Abs-" + m_Percentiles[i]); break; case BOTH: row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]); row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } // set up bootstrapping preds = evalAll.predictions(); random = new Random(m_Seed); indices = new TIntArrayList(); size = (int) Math.round(preds.size() * m_Percentage); header = evalAll.getHeader(); numeric = header.classAttribute().isNumeric(); m_ClassIndex.setData(header.classAttribute()); if (numeric) classIndex = -1; else classIndex = m_ClassIndex.getIntIndex(); for (i = 0; i < preds.size(); i++) indices.add(i); // create fake evalutions subset = new TIntArrayList(); for (iteration = 0; iteration < m_NumSubSamples; iteration++) { if (isStopped()) { sheet = null; break; } // determine subset.clear(); if (m_WithReplacement) { for (i = 0; i < size; i++) subset.add(indices.get(random.nextInt(preds.size()))); } else { indices.shuffle(random); for (i = 0; i < size; i++) subset.add(indices.get(i)); } // create dataset from predictions errors = new Double[size]; errorsRev = new Double[size]; atts = new ArrayList<>(); atts.add(header.classAttribute().copy("Actual")); data = new Instances(header.relationName() + "-" + (iteration + 1), atts, size); data.setClassIndex(0); for (i = 0; i < subset.size(); i++) { inst = new DenseInstance(preds.get(subset.get(i)).weight(), new double[] { preds.get(subset.get(i)).actual() }); data.add(inst); switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted(); break; case PREDICTED_MINUS_ACTUAL: errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual(); break; case ABSOLUTE: errors[i] = Math .abs(preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted()); break; case BOTH: errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted(); errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual(); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } // perform "fake" evaluation try { eval = new Evaluation(data); for (i = 0; i < subset.size(); i++) { if (numeric) eval.evaluateModelOnceAndRecordPrediction( new double[] { preds.get(subset.get(i)).predicted() }, data.instance(i)); else eval.evaluateModelOnceAndRecordPrediction( ((NominalPrediction) preds.get(subset.get(i))).distribution().clone(), data.instance(i)); } } catch (Exception e) { result = handleException( "Failed to create 'fake' Evaluation object (iteration: " + (iteration + 1) + ")!", e); break; } // add row row = sheet.addRow(); row.addCell("S").setContent(iteration + 1); for (EvaluationStatistic s : m_StatisticValues) { try { row.addCell(s.toString()).setContent(EvaluationHelper.getValue(eval, s, classIndex)); } catch (Exception e) { getLogger().log(Level.SEVERE, "Failed to calculate statistic in iteration #" + (iteration + 1) + ": " + s, e); row.addCell(s.toString()).setMissing(); } } for (i = 0; i < m_Percentiles.length; i++) { perc = new Percentile<>(); perc.addAll(errors); percRev = new Percentile<>(); percRev.addAll(errorsRev); switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); break; case PREDICTED_MINUS_ACTUAL: row.addCell("perc-PmA-" + i) .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue())); break; case ABSOLUTE: row.addCell("perc-Abs-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); break; case BOTH: row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); row.addCell("perc-PmA-" + i) .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue())); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } } if ((result == null) && (sheet != null)) m_OutputToken = new Token(sheet); } return result; }
From source file:adams.flow.transformer.WekaExtractArray.java
License:Open Source License
/** * Executes the flow item./*from w ww . j a va2 s.c om*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Double[] array; Instances inst; SpreadSheet sheet; int i; int index; Cell cell; result = null; array = null; if (m_InputToken.getPayload() instanceof Instances) { inst = (Instances) m_InputToken.getPayload(); if (m_Type == ExtractionType.COLUMN) m_Index.setMax(inst.numAttributes()); else m_Index.setMax(inst.numInstances()); index = m_Index.getIntIndex(); if (index == -1) result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")"; else if ((m_Type == ExtractionType.COLUMN) && !inst.attribute(index).isNumeric()) result = "Column " + m_Index + " is not numeric!"; if (result == null) { if (m_Type == ExtractionType.COLUMN) { array = new Double[inst.numInstances()]; for (i = 0; i < array.length; i++) array[i] = inst.instance(i).value(index); } else { array = new Double[inst.numAttributes()]; for (i = 0; i < array.length; i++) array[i] = inst.instance(index).value(i); } } } else { sheet = (SpreadSheet) m_InputToken.getPayload(); if (m_Type == ExtractionType.COLUMN) m_Index.setMax(sheet.getColumnCount()); else m_Index.setMax(sheet.getRowCount()); index = m_Index.getIntIndex(); if (index == -1) result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")"; else if ((m_Type == ExtractionType.COLUMN) && !sheet.isNumeric(index, true)) result = "Column " + m_Index + " is not numeric!"; if (result == null) { if (m_Type == ExtractionType.COLUMN) { array = new Double[sheet.getRowCount()]; for (i = 0; i < array.length; i++) { cell = sheet.getCell(i, index); if ((cell != null) && !cell.isMissing()) array[i] = cell.toDouble(); } } else { array = new Double[sheet.getColumnCount()]; for (i = 0; i < array.length; i++) { cell = sheet.getCell(index, i); if ((cell != null) && !cell.isMissing()) array[i] = cell.toDouble(); } } } } if (array != null) m_OutputToken = new Token(array); return result; }
From source file:adams.flow.transformer.WekaFilter.java
License:Open Source License
/** * Executes the flow item.//from w ww .j a v a2s . c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; weka.core.Instances data; weka.core.Instances filteredData; weka.core.Instance inst; adams.data.instance.Instance instA; weka.core.Instance filteredInst; String relation; result = null; data = null; inst = null; if (m_InputToken.hasPayload(weka.core.Instance.class)) inst = m_InputToken.getPayload(weka.core.Instance.class); else if (m_InputToken.hasPayload(adams.data.instance.Instance.class)) inst = m_InputToken.getPayload(adams.data.instance.Instance.class).toInstance(); else if (m_InputToken.hasPayload(weka.core.Instances.class)) data = m_InputToken.getPayload(weka.core.Instances.class); else result = m_InputToken.unhandledData(); if (result == null) { try { // initialize filter? if (!m_Initialized || !m_InitializeOnce) { if (data == null) { data = new weka.core.Instances(inst.dataset(), 0); data.add(inst); } initActualFilter(data); } synchronized (m_ActualFilter) { if (!m_FlowContextUpdated) { m_FlowContextUpdated = true; if (m_ActualFilter instanceof FlowContextHandler) ((FlowContextHandler) m_ActualFilter).setFlowContext(this); } // filter data filteredData = null; filteredInst = null; if (data != null) { relation = data.relationName(); filteredData = weka.filters.Filter.useFilter(data, m_ActualFilter); if (m_KeepRelationName) { filteredData.setRelationName(relation); if (isLoggingEnabled()) getLogger().info("Setting relation name: " + relation); } m_Initialized = true; } else { relation = inst.dataset().relationName(); m_ActualFilter.input(inst); m_ActualFilter.batchFinished(); filteredInst = m_ActualFilter.output(); if (m_KeepRelationName) { filteredInst.dataset().setRelationName(relation); if (isLoggingEnabled()) getLogger().info("Setting relation name: " + relation); } } } // build output token if (inst != null) { if (filteredInst != null) { if (m_InputToken.getPayload() instanceof weka.core.Instance) { m_OutputToken = new Token(filteredInst); } else { instA = new adams.data.instance.Instance(); instA.set(filteredInst); m_OutputToken = createToken(m_InputToken.getPayload(), instA); } } else if ((filteredData != null) && (filteredData.numInstances() > 0)) { m_OutputToken = createToken(m_InputToken.getPayload(), filteredData.instance(0)); } } else { m_OutputToken = createToken(m_InputToken.getPayload(), filteredData); } } catch (Exception e) { result = handleException("Failed to filter data: ", e); } } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:adams.flow.transformer.WekaGetInstancesValue.java
License:Open Source License
/** * Executes the flow item.//w ww . j ava 2 s. co m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances inst; int index; int row; result = null; inst = (Instances) m_InputToken.getPayload(); m_Column.setData(inst); m_Row.setMax(inst.numInstances()); index = m_Column.getIntIndex(); row = m_Row.getIntIndex(); if (row == -1) result = "Failed to retrieve row: " + m_Row.getIndex(); else if (index == -1) result = "Failed to retrieve column: " + m_Column.getIndex(); if (result == null) { try { if (inst.instance(row).isMissing(index)) { m_OutputToken = new Token("?"); } else { switch (inst.attribute(index).type()) { case Attribute.NUMERIC: m_OutputToken = new Token(inst.instance(row).value(index)); break; case Attribute.DATE: case Attribute.NOMINAL: case Attribute.STRING: case Attribute.RELATIONAL: m_OutputToken = new Token(inst.instance(row).stringValue(index)); break; default: result = "Unhandled attribute type: " + inst.attribute(index).type(); } } } catch (Exception e) { result = handleException("Failed to obtain value from dataset:", e); } } return result; }
From source file:adams.flow.transformer.WekaInstancesHistogramRanges.java
License:Open Source License
/** * Executes the flow item.// w ww . j a va 2 s .com * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Instances data; int i; int n; Index index; ArrayHistogram stat; result = null; m_Queue.clear(); try { sheet = null; data = (Instances) m_InputToken.getPayload(); stat = new ArrayHistogram(); stat.setBinCalculation(m_BinCalculation); stat.setNumBins(m_NumBins); stat.setBinWidth(m_BinWidth); stat.setNormalize(m_Normalize); stat.setUseFixedMinMax(m_UseFixedMinMax); stat.setManualMin(m_ManualMin); stat.setManualMax(m_ManualMax); stat.setDisplayRanges(true); stat.setNumDecimals(m_NumDecimals); for (i = 0; i < m_Locations.length; i++) { switch (m_DataType) { case ROW_BY_INDEX: index = new Index(m_Locations[i].stringValue()); index.setMax(data.numInstances()); stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray())); break; case COLUMN_BY_INDEX: index = new WekaAttributeIndex(m_Locations[i].stringValue()); ((WekaAttributeIndex) index).setData(data); stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex()))); break; case COLUMN_BY_REGEXP: for (n = 0; n < data.numAttributes(); n++) { if (data.attribute(n).name().matches(m_Locations[i].stringValue())) { stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n))); break; } } break; default: throw new IllegalStateException("Unhandled data type: " + m_DataType); } } sheet = stat.calculate().toSpreadSheet(); } catch (Exception e) { result = handleException("Error generating the ranges: ", e); sheet = null; } if (sheet != null) { for (i = 0; i < sheet.getColumnCount(); i++) m_Queue.add(sheet.getColumnName(i)); } return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Prefixes the attributes.//from w ww .ja v a 2 s . c om * * @param index the index of the dataset * @param inst the data to process * @return the processed data */ protected Instances prefixAttributes(Instances inst, int index) { Instances result; String prefix; ArrayList<Attribute> atts; int i; prefix = createPrefix(inst, index); // header atts = new ArrayList<>(); for (i = 0; i < inst.numAttributes(); i++) atts.add(inst.attribute(i).copy(prefix + inst.attribute(i).name())); // data result = new Instances(inst.relationName(), atts, inst.numInstances()); result.setClassIndex(inst.classIndex()); for (i = 0; i < inst.numInstances(); i++) result.add((Instance) inst.instance(i).copy()); return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Updates the IDs in the hashset with the ones stored in the ID attribute * of the provided dataset./*from w w w. j av a2 s .c o m*/ * * @param instIndex the dataset index * @param inst the dataset to obtain the IDs from * @param ids the hashset to store the IDs in */ protected void updateIDs(int instIndex, Instances inst, HashSet ids) { Attribute att; int i; boolean numeric; HashSet current; Object id; att = inst.attribute(m_UniqueID); if (att == null) throw new IllegalStateException("Attribute '" + m_UniqueID + "' not found in relation '" + inst.relationName() + "' (#" + (instIndex + 1) + ")!"); // determine/check type if (m_AttType == -1) { if ((att.type() == Attribute.NUMERIC) || (att.type() == Attribute.STRING)) m_AttType = att.type(); else throw new IllegalStateException("Attribute '" + m_UniqueID + "' must be either NUMERIC or STRING (#" + (instIndex + 1) + ")!"); } else { if (m_AttType != att.type()) throw new IllegalStateException("Attribute '" + m_UniqueID + "' must have same attribute type in all the datasets (#" + (instIndex + 1) + ")!"); } // get IDs numeric = m_AttType == Attribute.NUMERIC; current = new HashSet(); for (i = 0; i < inst.numInstances(); i++) { if (numeric) id = inst.instance(i).value(att); else id = inst.instance(i).stringValue(att); if (m_Strict && current.contains(id)) throw new IllegalStateException( "ID '" + id + "' is not unique in dataset #" + (instIndex + 1) + "!"); current.add(id); } ids.addAll(current); }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Merges the datasets based on the collected IDs. * * @param orig the original datasets/* w w w. jav a 2 s . com*/ * @param inst the processed datasets to merge into one * @param ids the IDs for identifying the rows * @return the merged dataset */ protected Instances merge(Instances[] orig, Instances[] inst, HashSet ids) { Instances result; ArrayList<Attribute> atts; int i; int n; int m; int index; String relation; List sortedIDs; Attribute att; int[] indexStart; double value; double[] values; HashMap<Integer, Integer> hashmap; HashSet<Instance> hs; // create header if (isLoggingEnabled()) getLogger().info("Creating merged header..."); atts = new ArrayList<>(); relation = ""; indexStart = new int[inst.length]; for (i = 0; i < inst.length; i++) { indexStart[i] = atts.size(); for (n = 0; n < inst[i].numAttributes(); n++) atts.add((Attribute) inst[i].attribute(n).copy()); // assemble relation name if (i > 0) relation += "_"; relation += inst[i].relationName(); } result = new Instances(relation, atts, ids.size()); // fill with missing values if (isLoggingEnabled()) getLogger().info("Filling with missing values..."); for (i = 0; i < ids.size(); i++) { if (isStopped()) return null; // progress if (isLoggingEnabled() && ((i + 1) % 1000 == 0)) getLogger().info("" + (i + 1)); result.add(new DenseInstance(result.numAttributes())); } // sort IDs if (isLoggingEnabled()) getLogger().info("Sorting indices..."); sortedIDs = new ArrayList(ids); Collections.sort(sortedIDs); // generate rows hashmap = new HashMap<>(); for (i = 0; i < inst.length; i++) { if (isStopped()) return null; if (isLoggingEnabled()) getLogger().info("Adding file #" + (i + 1)); att = orig[i].attribute(m_UniqueID); for (n = 0; n < inst[i].numInstances(); n++) { // progress if (isLoggingEnabled() && ((n + 1) % 1000 == 0)) getLogger().info("" + (n + 1)); // determine index of row if (m_AttType == Attribute.NUMERIC) index = Collections.binarySearch(sortedIDs, inst[i].instance(n).value(att)); else index = Collections.binarySearch(sortedIDs, inst[i].instance(n).stringValue(att)); if (index < 0) throw new IllegalStateException( "Failed to determine index for row #" + (n + 1) + " of dataset #" + (i + 1) + "!"); if (!hashmap.containsKey(index)) hashmap.put(index, 0); hashmap.put(index, hashmap.get(index) + 1); // use internal representation for faster access values = result.instance(index).toDoubleArray(); // add attribute values for (m = 0; m < inst[i].numAttributes(); m++) { // missing value? if (inst[i].instance(n).isMissing(m)) continue; switch (inst[i].attribute(m).type()) { case Attribute.NUMERIC: case Attribute.DATE: case Attribute.NOMINAL: values[indexStart[i] + m] = inst[i].instance(n).value(m); break; case Attribute.STRING: value = result.attribute(indexStart[i] + m) .addStringValue(inst[i].instance(n).stringValue(m)); values[indexStart[i] + m] = value; break; case Attribute.RELATIONAL: value = result.attribute(indexStart[i] + m) .addRelation(inst[i].instance(n).relationalValue(m)); values[indexStart[i] + m] = value; break; default: throw new IllegalStateException("Unhandled attribute type: " + inst[i].attribute(m).type()); } } // update row result.set(index, new DenseInstance(1.0, values)); } } if (getRemove()) { hs = new HashSet<>(); for (Integer x : hashmap.keySet()) { if (hashmap.get(x) != inst.length) hs.add(result.get(x)); } result.removeAll(hs); } return result; }
From source file:adams.flow.transformer.WekaInstancesStatistic.java
License:Open Source License
/** * Executes the flow item./*from ww w . j a va 2 s .c om*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Instances data; int i; int n; Index index; AbstractArrayStatistic stat; result = null; try { sheet = null; data = (Instances) m_InputToken.getPayload(); stat = m_Statistic.shallowCopy(true); for (i = 0; i < m_Locations.length; i++) { switch (m_DataType) { case ROW_BY_INDEX: index = new Index(m_Locations[i].stringValue()); index.setMax(data.numInstances()); stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray())); break; case COLUMN_BY_INDEX: index = new WekaAttributeIndex(m_Locations[i].stringValue()); ((WekaAttributeIndex) index).setData(data); stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex()))); break; case COLUMN_BY_REGEXP: for (n = 0; n < data.numAttributes(); n++) { if (data.attribute(n).name().matches(m_Locations[i].stringValue())) { stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n))); break; } } break; default: throw new IllegalStateException("Unhandled data type: " + m_DataType); } } sheet = stat.calculate().toSpreadSheet(); } catch (Exception e) { result = handleException("Error generating the statistic: ", e); sheet = null; } if (sheet != null) m_OutputToken = new Token(sheet); return result; }