List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:adams.data.conversion.TimeseriesToWekaInstances.java
License:Open Source License
/** * Performs the actual conversion./*w w w . j a v a 2s. co m*/ * * @return the converted data * @throws Exception if something goes wrong with the conversion */ @Override protected Object doConvert() throws Exception { Instances result; ArrayList<Attribute> atts; Instance inst; Timeseries series; TimeseriesPoint point; double[] value; series = (Timeseries) m_Input; atts = new ArrayList<Attribute>(); atts.add(new Attribute("Timestamp", m_Format.getValue())); atts.add(new Attribute("Value")); result = new Instances(series.getID(), atts, series.size()); for (Object obj : series.toList()) { point = (TimeseriesPoint) obj; value = new double[2]; value[0] = point.getTimestamp().getTime(); value[1] = point.getValue(); inst = new DenseInstance(1.0, value); result.add(inst); } return result; }
From source file:adams.data.featureconverter.Weka.java
License:Open Source License
/** * Performs the actual generation of a row from the raw data. * //w ww . j a v a 2s .co m * @param data the data of the row, elements can be null (= missing) * @return the dataset structure */ @Override protected Instances doGenerateHeader(HeaderDefinition header) { Instances result; ArrayList<Attribute> atts; ArrayList<String> values; int i; atts = new ArrayList<Attribute>(); for (i = 0; i < header.size(); i++) { switch (header.getType(i)) { case BOOLEAN: values = new ArrayList<String>(); values.add("yes"); values.add("no"); atts.add(new Attribute(header.getName(i), values)); break; case NUMERIC: atts.add(new Attribute(header.getName(i))); break; case STRING: case UNKNOWN: atts.add(new Attribute(header.getName(i), (List<String>) null)); break; } } result = new Instances(header.getDataset(), atts, 0); return result; }
From source file:adams.data.instancesanalysis.pls.AbstractMultiClassPLS.java
License:Open Source License
/** * Determines the output format based on the input format and returns this. * * @param input the input format to base the output format on * @return the output format/*from w ww . j ava2 s .co m*/ * @throws Exception in case the determination goes wrong */ @Override public Instances determineOutputFormat(Instances input) throws Exception { ArrayList<Attribute> atts; String prefix; int i; Instances result; List<String> classes; // collect classes m_ClassAttributeIndices = new TIntArrayList(); classes = new ArrayList<>(); for (i = 0; i < input.numAttributes(); i++) { if (m_ClassAttributes.isMatch(input.attribute(i).name())) { classes.add(input.attribute(i).name()); m_ClassAttributeIndices.add(i); } } if (!classes.contains(input.classAttribute().name())) { classes.add(input.classAttribute().name()); m_ClassAttributeIndices.add(input.classAttribute().index()); } // generate header atts = new ArrayList<>(); prefix = getClass().getSimpleName(); for (i = 0; i < getNumComponents(); i++) atts.add(new Attribute(prefix + "_" + (i + 1))); for (String cls : classes) atts.add(new Attribute(cls)); result = new Instances(prefix, atts, 0); result.setClassIndex(result.numAttributes() - 1); m_OutputFormat = result; return result; }
From source file:adams.data.instancesanalysis.pls.AbstractSingleClassPLS.java
License:Open Source License
/** * Determines the output format based on the input format and returns this. * * @param input the input format to base the output format on * @return the output format/*from ww w .ja v a2 s . c o m*/ * @throws Exception in case the determination goes wrong */ @Override public Instances determineOutputFormat(Instances input) throws Exception { ArrayList<Attribute> atts; String prefix; int i; Instances result; // generate header atts = new ArrayList<>(); prefix = getClass().getSimpleName(); for (i = 0; i < getNumComponents(); i++) atts.add(new Attribute(prefix + "_" + (i + 1))); atts.add(new Attribute(input.classAttribute().name())); result = new Instances(prefix, atts, 0); result.setClassIndex(result.numAttributes() - 1); m_OutputFormat = result; return result; }
From source file:adams.flow.source.WekaNewInstances.java
License:Open Source License
/** * Executes the flow item.//from w w w . jav a 2s . c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; ArrayList<Attribute> atts; Instances data; int i; int index; String[] types; String[] names; String name; Attribute att; result = null; m_OutputToken = null; try { types = m_AttributeTypes.listValue(); names = m_AttributeNames.listValue(); m_ClassIndex.setMax(types.length); index = m_ClassIndex.getIntIndex(); atts = new ArrayList<Attribute>(); for (i = 0; i < types.length; i++) { // determine name of attribute if (i >= names.length) { if (i == index) { if (m_ClassName.length() == 0) name = DEFAULT_CLASS; else name = m_ClassName; } else { name = ATTRIBUTE_PREFIX + (i + 1); } } else { name = names[i]; } if (types[i].equals(AttributeTypeList.ATT_NUMERIC)) att = new Attribute(name); else if (types[i].equals(AttributeTypeList.ATT_NOMINAL)) att = new Attribute(name, new ArrayList<String>()); else if (types[i].equals(AttributeTypeList.ATT_STRING)) att = new Attribute(name, (ArrayList<String>) null); else if (types[i].equals(AttributeTypeList.ATT_DATE)) att = new Attribute(name, DEFAULT_DATE_FORMAT); else throw new IllegalStateException("Unhandled attribute type: " + types[i]); atts.add(att); } if (m_RelationName.length() == 0) name = getFullName(); else name = m_RelationName; data = new Instances(name, atts, 0); data.setClassIndex(index); m_OutputToken = new Token(data); updateProvenance(m_OutputToken); } catch (Exception e) { result = handleException("Failed to create new dataset: ", e); } return result; }
From source file:adams.flow.transformer.WekaBootstrapping.java
License:Open Source License
/** * Executes the flow item./*w w w . j a v a2 s . co m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Row row; Evaluation evalAll; Evaluation eval; WekaEvaluationContainer cont; TIntList indices; Random random; int i; int iteration; int size; List<Prediction> preds; Instances header; Instances data; ArrayList<Attribute> atts; Instance inst; boolean numeric; int classIndex; Double[] errors; Double[] errorsRev; Percentile<Double> perc; Percentile<Double> percRev; TIntList subset; result = null; if (m_InputToken.getPayload() instanceof Evaluation) { evalAll = (Evaluation) m_InputToken.getPayload(); } else { cont = (WekaEvaluationContainer) m_InputToken.getPayload(); evalAll = (Evaluation) cont.getValue(WekaEvaluationContainer.VALUE_EVALUATION); } if ((evalAll.predictions() == null) || (evalAll.predictions().size() == 0)) result = "No predictions available!"; if (result == null) { // init spreadsheet sheet = new DefaultSpreadSheet(); row = sheet.getHeaderRow(); row.addCell("S").setContentAsString("Subsample"); for (EvaluationStatistic s : m_StatisticValues) row.addCell(s.toString()).setContentAsString(s.toString()); for (i = 0; i < m_Percentiles.length; i++) { switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]); break; case PREDICTED_MINUS_ACTUAL: row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]); break; case ABSOLUTE: row.addCell("perc-Abs-" + i).setContentAsString("Percentile-Abs-" + m_Percentiles[i]); break; case BOTH: row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]); row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } // set up bootstrapping preds = evalAll.predictions(); random = new Random(m_Seed); indices = new TIntArrayList(); size = (int) Math.round(preds.size() * m_Percentage); header = evalAll.getHeader(); numeric = header.classAttribute().isNumeric(); m_ClassIndex.setData(header.classAttribute()); if (numeric) classIndex = -1; else classIndex = m_ClassIndex.getIntIndex(); for (i = 0; i < preds.size(); i++) indices.add(i); // create fake evalutions subset = new TIntArrayList(); for (iteration = 0; iteration < m_NumSubSamples; iteration++) { if (isStopped()) { sheet = null; break; } // determine subset.clear(); if (m_WithReplacement) { for (i = 0; i < size; i++) subset.add(indices.get(random.nextInt(preds.size()))); } else { indices.shuffle(random); for (i = 0; i < size; i++) subset.add(indices.get(i)); } // create dataset from predictions errors = new Double[size]; errorsRev = new Double[size]; atts = new ArrayList<>(); atts.add(header.classAttribute().copy("Actual")); data = new Instances(header.relationName() + "-" + (iteration + 1), atts, size); data.setClassIndex(0); for (i = 0; i < subset.size(); i++) { inst = new DenseInstance(preds.get(subset.get(i)).weight(), new double[] { preds.get(subset.get(i)).actual() }); data.add(inst); switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted(); break; case PREDICTED_MINUS_ACTUAL: errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual(); break; case ABSOLUTE: errors[i] = Math .abs(preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted()); break; case BOTH: errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted(); errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual(); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } // perform "fake" evaluation try { eval = new Evaluation(data); for (i = 0; i < subset.size(); i++) { if (numeric) eval.evaluateModelOnceAndRecordPrediction( new double[] { preds.get(subset.get(i)).predicted() }, data.instance(i)); else eval.evaluateModelOnceAndRecordPrediction( ((NominalPrediction) preds.get(subset.get(i))).distribution().clone(), data.instance(i)); } } catch (Exception e) { result = handleException( "Failed to create 'fake' Evaluation object (iteration: " + (iteration + 1) + ")!", e); break; } // add row row = sheet.addRow(); row.addCell("S").setContent(iteration + 1); for (EvaluationStatistic s : m_StatisticValues) { try { row.addCell(s.toString()).setContent(EvaluationHelper.getValue(eval, s, classIndex)); } catch (Exception e) { getLogger().log(Level.SEVERE, "Failed to calculate statistic in iteration #" + (iteration + 1) + ": " + s, e); row.addCell(s.toString()).setMissing(); } } for (i = 0; i < m_Percentiles.length; i++) { perc = new Percentile<>(); perc.addAll(errors); percRev = new Percentile<>(); percRev.addAll(errorsRev); switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); break; case PREDICTED_MINUS_ACTUAL: row.addCell("perc-PmA-" + i) .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue())); break; case ABSOLUTE: row.addCell("perc-Abs-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); break; case BOTH: row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); row.addCell("perc-PmA-" + i) .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue())); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } } if ((result == null) && (sheet != null)) m_OutputToken = new Token(sheet); } return result; }
From source file:adams.flow.transformer.WekaInstanceDumper.java
License:Open Source License
/** * Executes the flow item./* w w w .j av a 2s.co m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instance inst; Instances newHeader; double[] values; boolean append; ArrayList<Attribute> atts; int i; result = null; if (m_InputToken.getPayload() instanceof Instance) { inst = (Instance) m_InputToken.getPayload(); // get header and optionally compare it to previous (to start a new // output file) newHeader = inst.dataset(); } else { values = (double[]) m_InputToken.getPayload(); // create artificial dataset header atts = new ArrayList<>(); for (i = 0; i < values.length; i++) atts.add(new Attribute("att_" + (i + 1))); newHeader = new Instances(getName(), atts, 0); inst = new DenseInstance(1.0, values); inst.setDataset(newHeader); } append = true; if (m_Header == null) { m_Header = new Instances(newHeader, 0); if (!m_KeepExisting) append = false; } else { if (m_CheckHeader) { if (!m_Header.equalHeaders(newHeader)) { m_Counter++; m_Header = new Instances(newHeader, 0); append = false; } } } if (!append) FileUtils.delete(createFilename(inst.dataset()).getAbsolutePath()); // buffer data and write to disk if necessary m_Buffer.add(inst); if (m_Buffer.size() >= m_BufferSize) result = writeToDisk(append); // broadcast name if (result == null) m_OutputToken = new Token(createFilename(inst.dataset()).getAbsolutePath()); return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Prefixes the attributes.//ww w . j a v a 2 s . c om * * @param index the index of the dataset * @param inst the data to process * @return the processed data */ protected Instances prefixAttributes(Instances inst, int index) { Instances result; String prefix; ArrayList<Attribute> atts; int i; prefix = createPrefix(inst, index); // header atts = new ArrayList<>(); for (i = 0; i < inst.numAttributes(); i++) atts.add(inst.attribute(i).copy(prefix + inst.attribute(i).name())); // data result = new Instances(inst.relationName(), atts, inst.numInstances()); result.setClassIndex(inst.classIndex()); for (i = 0; i < inst.numInstances(); i++) result.add((Instance) inst.instance(i).copy()); return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Merges the datasets based on the collected IDs. * * @param orig the original datasets/* w w w .j ava 2s .c om*/ * @param inst the processed datasets to merge into one * @param ids the IDs for identifying the rows * @return the merged dataset */ protected Instances merge(Instances[] orig, Instances[] inst, HashSet ids) { Instances result; ArrayList<Attribute> atts; int i; int n; int m; int index; String relation; List sortedIDs; Attribute att; int[] indexStart; double value; double[] values; HashMap<Integer, Integer> hashmap; HashSet<Instance> hs; // create header if (isLoggingEnabled()) getLogger().info("Creating merged header..."); atts = new ArrayList<>(); relation = ""; indexStart = new int[inst.length]; for (i = 0; i < inst.length; i++) { indexStart[i] = atts.size(); for (n = 0; n < inst[i].numAttributes(); n++) atts.add((Attribute) inst[i].attribute(n).copy()); // assemble relation name if (i > 0) relation += "_"; relation += inst[i].relationName(); } result = new Instances(relation, atts, ids.size()); // fill with missing values if (isLoggingEnabled()) getLogger().info("Filling with missing values..."); for (i = 0; i < ids.size(); i++) { if (isStopped()) return null; // progress if (isLoggingEnabled() && ((i + 1) % 1000 == 0)) getLogger().info("" + (i + 1)); result.add(new DenseInstance(result.numAttributes())); } // sort IDs if (isLoggingEnabled()) getLogger().info("Sorting indices..."); sortedIDs = new ArrayList(ids); Collections.sort(sortedIDs); // generate rows hashmap = new HashMap<>(); for (i = 0; i < inst.length; i++) { if (isStopped()) return null; if (isLoggingEnabled()) getLogger().info("Adding file #" + (i + 1)); att = orig[i].attribute(m_UniqueID); for (n = 0; n < inst[i].numInstances(); n++) { // progress if (isLoggingEnabled() && ((n + 1) % 1000 == 0)) getLogger().info("" + (n + 1)); // determine index of row if (m_AttType == Attribute.NUMERIC) index = Collections.binarySearch(sortedIDs, inst[i].instance(n).value(att)); else index = Collections.binarySearch(sortedIDs, inst[i].instance(n).stringValue(att)); if (index < 0) throw new IllegalStateException( "Failed to determine index for row #" + (n + 1) + " of dataset #" + (i + 1) + "!"); if (!hashmap.containsKey(index)) hashmap.put(index, 0); hashmap.put(index, hashmap.get(index) + 1); // use internal representation for faster access values = result.instance(index).toDoubleArray(); // add attribute values for (m = 0; m < inst[i].numAttributes(); m++) { // missing value? if (inst[i].instance(n).isMissing(m)) continue; switch (inst[i].attribute(m).type()) { case Attribute.NUMERIC: case Attribute.DATE: case Attribute.NOMINAL: values[indexStart[i] + m] = inst[i].instance(n).value(m); break; case Attribute.STRING: value = result.attribute(indexStart[i] + m) .addStringValue(inst[i].instance(n).stringValue(m)); values[indexStart[i] + m] = value; break; case Attribute.RELATIONAL: value = result.attribute(indexStart[i] + m) .addRelation(inst[i].instance(n).relationalValue(m)); values[indexStart[i] + m] = value; break; default: throw new IllegalStateException("Unhandled attribute type: " + inst[i].attribute(m).type()); } } // update row result.set(index, new DenseInstance(1.0, values)); } } if (getRemove()) { hs = new HashSet<>(); for (Integer x : hashmap.keySet()) { if (hashmap.get(x) != inst.length) hs.add(result.get(x)); } result.removeAll(hs); } return result; }
From source file:adams.flow.transformer.WekaPredictionsToInstances.java
License:Open Source License
/** * Executes the flow item.// w ww . j a v a 2 s .co m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Evaluation eval; int i; int n; int indexErr; int indexProb; int indexDist; int indexWeight; boolean nominal; Instances header; ArrayList<Attribute> atts; ArrayList<String> values; ArrayList<Prediction> predictions; Prediction pred; double[] vals; Instances data; Instances testData; int[] indices; result = null; if (m_InputToken.getPayload() instanceof WekaEvaluationContainer) { eval = (Evaluation) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_EVALUATION); indices = (int[]) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_ORIGINALINDICES); testData = (Instances) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_TESTDATA); } else { eval = (Evaluation) m_InputToken.getPayload(); indices = null; testData = null; } header = eval.getHeader(); nominal = header.classAttribute().isNominal(); predictions = eval.predictions(); if (predictions != null) { // create header atts = new ArrayList<>(); // actual if (nominal && m_AddLabelIndex) { values = new ArrayList<>(); for (i = 0; i < header.classAttribute().numValues(); i++) values.add((i + 1) + ":" + header.classAttribute().value(i)); atts.add(new Attribute(m_MeasuresPrefix + "Actual", values)); } else { atts.add(header.classAttribute().copy(m_MeasuresPrefix + "Actual")); } // predicted if (nominal && m_AddLabelIndex) { values = new ArrayList<>(); for (i = 0; i < header.classAttribute().numValues(); i++) values.add((i + 1) + ":" + header.classAttribute().value(i)); atts.add(new Attribute(m_MeasuresPrefix + "Predicted", values)); } else { atts.add(header.classAttribute().copy(m_MeasuresPrefix + "Predicted")); } // error indexErr = -1; if (m_ShowError) { indexErr = atts.size(); if (nominal) { values = new ArrayList<>(); values.add("n"); values.add("y"); atts.add(new Attribute(m_MeasuresPrefix + "Error", values)); } else { atts.add(new Attribute(m_MeasuresPrefix + "Error")); } } // probability indexProb = -1; if (m_ShowProbability && nominal) { indexProb = atts.size(); atts.add(new Attribute(m_MeasuresPrefix + "Probability")); } // distribution indexDist = -1; if (m_ShowDistribution && nominal) { indexDist = atts.size(); for (n = 0; n < header.classAttribute().numValues(); n++) atts.add(new Attribute( m_MeasuresPrefix + "Distribution (" + header.classAttribute().value(n) + ")")); } // weight indexWeight = -1; if (m_ShowWeight) { indexWeight = atts.size(); atts.add(new Attribute(m_MeasuresPrefix + "Weight")); } data = new Instances("Predictions", atts, predictions.size()); data.setClassIndex(1); // predicted // add data if ((indices != null) && m_UseOriginalIndices) predictions = CrossValidationHelper.alignPredictions(predictions, indices); for (i = 0; i < predictions.size(); i++) { pred = predictions.get(i); vals = new double[data.numAttributes()]; // actual vals[0] = pred.actual(); // predicted vals[1] = pred.predicted(); // error if (m_ShowError) { if (nominal) { vals[indexErr] = ((pred.actual() != pred.predicted()) ? 1.0 : 0.0); } else { if (m_UseAbsoluteError) vals[indexErr] = Math.abs(pred.actual() - pred.predicted()); else vals[indexErr] = pred.actual() - pred.predicted(); } } // probability if (m_ShowProbability && nominal) { vals[indexProb] = StatUtils.max(((NominalPrediction) pred).distribution()); } // distribution if (m_ShowDistribution && nominal) { for (n = 0; n < header.classAttribute().numValues(); n++) vals[indexDist + n] = ((NominalPrediction) pred).distribution()[n]; } // weight if (m_ShowWeight) { vals[indexWeight] = pred.weight(); } // add row data.add(new DenseInstance(1.0, vals)); } // add test data? if ((testData != null) && !m_TestAttributes.isEmpty()) { testData = filterTestData(testData); if (testData != null) data = Instances.mergeInstances(data, testData); } // generate output token m_OutputToken = new Token(data); } else { getLogger().severe("No predictions available from Evaluation object!"); } return result; }