List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:adams.flow.transformer.WekaFilter.java
License:Open Source License
/** * Executes the flow item./* w w w. j av a2 s . c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; weka.core.Instances data; weka.core.Instances filteredData; weka.core.Instance inst; adams.data.instance.Instance instA; weka.core.Instance filteredInst; String relation; result = null; data = null; inst = null; if (m_InputToken.hasPayload(weka.core.Instance.class)) inst = m_InputToken.getPayload(weka.core.Instance.class); else if (m_InputToken.hasPayload(adams.data.instance.Instance.class)) inst = m_InputToken.getPayload(adams.data.instance.Instance.class).toInstance(); else if (m_InputToken.hasPayload(weka.core.Instances.class)) data = m_InputToken.getPayload(weka.core.Instances.class); else result = m_InputToken.unhandledData(); if (result == null) { try { // initialize filter? if (!m_Initialized || !m_InitializeOnce) { if (data == null) { data = new weka.core.Instances(inst.dataset(), 0); data.add(inst); } initActualFilter(data); } synchronized (m_ActualFilter) { if (!m_FlowContextUpdated) { m_FlowContextUpdated = true; if (m_ActualFilter instanceof FlowContextHandler) ((FlowContextHandler) m_ActualFilter).setFlowContext(this); } // filter data filteredData = null; filteredInst = null; if (data != null) { relation = data.relationName(); filteredData = weka.filters.Filter.useFilter(data, m_ActualFilter); if (m_KeepRelationName) { filteredData.setRelationName(relation); if (isLoggingEnabled()) getLogger().info("Setting relation name: " + relation); } m_Initialized = true; } else { relation = inst.dataset().relationName(); m_ActualFilter.input(inst); m_ActualFilter.batchFinished(); filteredInst = m_ActualFilter.output(); if (m_KeepRelationName) { filteredInst.dataset().setRelationName(relation); if (isLoggingEnabled()) getLogger().info("Setting relation name: " + relation); } } } // build output token if (inst != null) { if (filteredInst != null) { if (m_InputToken.getPayload() instanceof weka.core.Instance) { m_OutputToken = new Token(filteredInst); } else { instA = new adams.data.instance.Instance(); instA.set(filteredInst); m_OutputToken = createToken(m_InputToken.getPayload(), instA); } } else if ((filteredData != null) && (filteredData.numInstances() > 0)) { m_OutputToken = createToken(m_InputToken.getPayload(), filteredData.instance(0)); } } else { m_OutputToken = createToken(m_InputToken.getPayload(), filteredData); } } catch (Exception e) { result = handleException("Failed to filter data: ", e); } } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:adams.flow.transformer.WekaInstancesAppend.java
License:Open Source License
/** * Executes the flow item./* w ww . ja v a2 s. c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; String[] filesStr; File[] files; int i; int n; Instances[] inst; Instances full; String msg; StringBuilder relation; double[] values; result = null; // get filenames files = null; inst = null; if (m_InputToken.getPayload() instanceof String[]) { filesStr = (String[]) m_InputToken.getPayload(); files = new File[filesStr.length]; for (i = 0; i < filesStr.length; i++) files[i] = new PlaceholderFile(filesStr[i]); } else if (m_InputToken.getPayload() instanceof File[]) { files = (File[]) m_InputToken.getPayload(); } else if (m_InputToken.getPayload() instanceof Instances[]) { inst = (Instances[]) m_InputToken.getPayload(); } else { throw new IllegalStateException("Unhandled input type: " + m_InputToken.getPayload().getClass()); } // load data? if (files != null) { inst = new Instances[files.length]; for (i = 0; i < files.length; i++) { try { inst[i] = DataSource.read(files[i].getAbsolutePath()); } catch (Exception e) { result = handleException("Failed to load dataset: " + files[i], e); break; } } } // test compatibility if (result == null) { for (i = 0; i < inst.length - 1; i++) { for (n = i + 1; n < inst.length; n++) { if ((msg = inst[i].equalHeadersMsg(inst[n])) != null) { result = "Dataset #" + (i + 1) + " and #" + (n + 1) + " are not compatible:\n" + msg; break; } } if (result != null) break; } } // append if (result == null) { full = new Instances(inst[0]); relation = new StringBuilder(inst[0].relationName()); for (i = 1; i < inst.length; i++) { relation.append("+" + inst[i].relationName()); for (Instance row : inst[i]) { values = row.toDoubleArray(); for (n = 0; n < values.length; n++) { if (row.attribute(n).isString()) values[n] = full.attribute(n).addStringValue(row.stringValue(n)); else if (row.attribute(n).isRelationValued()) values[n] = full.attribute(n).addRelation(row.relationalValue(n)); } if (row instanceof SparseInstance) row = new SparseInstance(row.weight(), values); else row = new DenseInstance(row.weight(), values); full.add(row); } } full.setRelationName(relation.toString()); m_OutputToken = new Token(full); } return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Prefixes the attributes.//from w w w . j a va 2 s . c o m * * @param index the index of the dataset * @param inst the data to process * @return the processed data */ protected Instances prefixAttributes(Instances inst, int index) { Instances result; String prefix; ArrayList<Attribute> atts; int i; prefix = createPrefix(inst, index); // header atts = new ArrayList<>(); for (i = 0; i < inst.numAttributes(); i++) atts.add(inst.attribute(i).copy(prefix + inst.attribute(i).name())); // data result = new Instances(inst.relationName(), atts, inst.numInstances()); result.setClassIndex(inst.classIndex()); for (i = 0; i < inst.numInstances(); i++) result.add((Instance) inst.instance(i).copy()); return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Merges the datasets based on the collected IDs. * * @param orig the original datasets/*from w w w .j av a2s . c om*/ * @param inst the processed datasets to merge into one * @param ids the IDs for identifying the rows * @return the merged dataset */ protected Instances merge(Instances[] orig, Instances[] inst, HashSet ids) { Instances result; ArrayList<Attribute> atts; int i; int n; int m; int index; String relation; List sortedIDs; Attribute att; int[] indexStart; double value; double[] values; HashMap<Integer, Integer> hashmap; HashSet<Instance> hs; // create header if (isLoggingEnabled()) getLogger().info("Creating merged header..."); atts = new ArrayList<>(); relation = ""; indexStart = new int[inst.length]; for (i = 0; i < inst.length; i++) { indexStart[i] = atts.size(); for (n = 0; n < inst[i].numAttributes(); n++) atts.add((Attribute) inst[i].attribute(n).copy()); // assemble relation name if (i > 0) relation += "_"; relation += inst[i].relationName(); } result = new Instances(relation, atts, ids.size()); // fill with missing values if (isLoggingEnabled()) getLogger().info("Filling with missing values..."); for (i = 0; i < ids.size(); i++) { if (isStopped()) return null; // progress if (isLoggingEnabled() && ((i + 1) % 1000 == 0)) getLogger().info("" + (i + 1)); result.add(new DenseInstance(result.numAttributes())); } // sort IDs if (isLoggingEnabled()) getLogger().info("Sorting indices..."); sortedIDs = new ArrayList(ids); Collections.sort(sortedIDs); // generate rows hashmap = new HashMap<>(); for (i = 0; i < inst.length; i++) { if (isStopped()) return null; if (isLoggingEnabled()) getLogger().info("Adding file #" + (i + 1)); att = orig[i].attribute(m_UniqueID); for (n = 0; n < inst[i].numInstances(); n++) { // progress if (isLoggingEnabled() && ((n + 1) % 1000 == 0)) getLogger().info("" + (n + 1)); // determine index of row if (m_AttType == Attribute.NUMERIC) index = Collections.binarySearch(sortedIDs, inst[i].instance(n).value(att)); else index = Collections.binarySearch(sortedIDs, inst[i].instance(n).stringValue(att)); if (index < 0) throw new IllegalStateException( "Failed to determine index for row #" + (n + 1) + " of dataset #" + (i + 1) + "!"); if (!hashmap.containsKey(index)) hashmap.put(index, 0); hashmap.put(index, hashmap.get(index) + 1); // use internal representation for faster access values = result.instance(index).toDoubleArray(); // add attribute values for (m = 0; m < inst[i].numAttributes(); m++) { // missing value? if (inst[i].instance(n).isMissing(m)) continue; switch (inst[i].attribute(m).type()) { case Attribute.NUMERIC: case Attribute.DATE: case Attribute.NOMINAL: values[indexStart[i] + m] = inst[i].instance(n).value(m); break; case Attribute.STRING: value = result.attribute(indexStart[i] + m) .addStringValue(inst[i].instance(n).stringValue(m)); values[indexStart[i] + m] = value; break; case Attribute.RELATIONAL: value = result.attribute(indexStart[i] + m) .addRelation(inst[i].instance(n).relationalValue(m)); values[indexStart[i] + m] = value; break; default: throw new IllegalStateException("Unhandled attribute type: " + inst[i].attribute(m).type()); } } // update row result.set(index, new DenseInstance(1.0, values)); } } if (getRemove()) { hs = new HashSet<>(); for (Integer x : hashmap.keySet()) { if (hashmap.get(x) != inst.length) hs.add(result.get(x)); } result.removeAll(hs); } return result; }
From source file:adams.flow.transformer.WekaPredictionsToInstances.java
License:Open Source License
/** * Executes the flow item.// ww w .j ava 2 s .c om * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Evaluation eval; int i; int n; int indexErr; int indexProb; int indexDist; int indexWeight; boolean nominal; Instances header; ArrayList<Attribute> atts; ArrayList<String> values; ArrayList<Prediction> predictions; Prediction pred; double[] vals; Instances data; Instances testData; int[] indices; result = null; if (m_InputToken.getPayload() instanceof WekaEvaluationContainer) { eval = (Evaluation) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_EVALUATION); indices = (int[]) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_ORIGINALINDICES); testData = (Instances) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_TESTDATA); } else { eval = (Evaluation) m_InputToken.getPayload(); indices = null; testData = null; } header = eval.getHeader(); nominal = header.classAttribute().isNominal(); predictions = eval.predictions(); if (predictions != null) { // create header atts = new ArrayList<>(); // actual if (nominal && m_AddLabelIndex) { values = new ArrayList<>(); for (i = 0; i < header.classAttribute().numValues(); i++) values.add((i + 1) + ":" + header.classAttribute().value(i)); atts.add(new Attribute(m_MeasuresPrefix + "Actual", values)); } else { atts.add(header.classAttribute().copy(m_MeasuresPrefix + "Actual")); } // predicted if (nominal && m_AddLabelIndex) { values = new ArrayList<>(); for (i = 0; i < header.classAttribute().numValues(); i++) values.add((i + 1) + ":" + header.classAttribute().value(i)); atts.add(new Attribute(m_MeasuresPrefix + "Predicted", values)); } else { atts.add(header.classAttribute().copy(m_MeasuresPrefix + "Predicted")); } // error indexErr = -1; if (m_ShowError) { indexErr = atts.size(); if (nominal) { values = new ArrayList<>(); values.add("n"); values.add("y"); atts.add(new Attribute(m_MeasuresPrefix + "Error", values)); } else { atts.add(new Attribute(m_MeasuresPrefix + "Error")); } } // probability indexProb = -1; if (m_ShowProbability && nominal) { indexProb = atts.size(); atts.add(new Attribute(m_MeasuresPrefix + "Probability")); } // distribution indexDist = -1; if (m_ShowDistribution && nominal) { indexDist = atts.size(); for (n = 0; n < header.classAttribute().numValues(); n++) atts.add(new Attribute( m_MeasuresPrefix + "Distribution (" + header.classAttribute().value(n) + ")")); } // weight indexWeight = -1; if (m_ShowWeight) { indexWeight = atts.size(); atts.add(new Attribute(m_MeasuresPrefix + "Weight")); } data = new Instances("Predictions", atts, predictions.size()); data.setClassIndex(1); // predicted // add data if ((indices != null) && m_UseOriginalIndices) predictions = CrossValidationHelper.alignPredictions(predictions, indices); for (i = 0; i < predictions.size(); i++) { pred = predictions.get(i); vals = new double[data.numAttributes()]; // actual vals[0] = pred.actual(); // predicted vals[1] = pred.predicted(); // error if (m_ShowError) { if (nominal) { vals[indexErr] = ((pred.actual() != pred.predicted()) ? 1.0 : 0.0); } else { if (m_UseAbsoluteError) vals[indexErr] = Math.abs(pred.actual() - pred.predicted()); else vals[indexErr] = pred.actual() - pred.predicted(); } } // probability if (m_ShowProbability && nominal) { vals[indexProb] = StatUtils.max(((NominalPrediction) pred).distribution()); } // distribution if (m_ShowDistribution && nominal) { for (n = 0; n < header.classAttribute().numValues(); n++) vals[indexDist + n] = ((NominalPrediction) pred).distribution()[n]; } // weight if (m_ShowWeight) { vals[indexWeight] = pred.weight(); } // add row data.add(new DenseInstance(1.0, vals)); } // add test data? if ((testData != null) && !m_TestAttributes.isEmpty()) { testData = filterTestData(testData); if (testData != null) data = Instances.mergeInstances(data, testData); } // generate output token m_OutputToken = new Token(data); } else { getLogger().severe("No predictions available from Evaluation object!"); } return result; }
From source file:adams.flow.transformer.WekaStoreInstance.java
License:Open Source License
/** * Executes the flow item.//from w w w. java2 s.c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instance inst; Instances data; Storage storage; result = null; inst = (Instance) m_InputToken.getPayload(); storage = getStorageHandler().getStorage(); // dataset present? if (!storage.has(m_Dataset)) { data = new Instances(inst.dataset(), 0); storage.put(m_Dataset, data); if (isLoggingEnabled()) getLogger().info("Adding dataset to storage: " + m_Dataset); } else { data = (Instances) storage.get(m_Dataset); if (isLoggingEnabled()) getLogger().info("Dataset present in storage: " + m_Dataset); } data.add(inst); storage.put(m_Dataset, data); if (isLoggingEnabled()) getLogger().info("Added instance to storage: " + m_Dataset); // broadcast data m_OutputToken = new Token(data); return result; }
From source file:adams.flow.transformer.WekaTrainClassifier.java
License:Open Source License
/** * Executes the flow item./*from w ww. ja v a 2s . c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances data; Instance inst; weka.classifiers.Classifier cls; result = null; try { cls = null; if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instances)) { cls = getClassifierInstance(); data = (Instances) m_InputToken.getPayload(); cls.buildClassifier(data); m_OutputToken = new Token(new WekaModelContainer(cls, new Instances(data, 0), data)); } else if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instance)) { if (m_IncrementalClassifier == null) { cls = getClassifierInstance(); if (!(cls instanceof UpdateableClassifier)) result = m_Classifier + "/" + cls.getClass().getName() + " is not an incremental classifier!"; } if (result == null) { inst = (Instance) m_InputToken.getPayload(); if (m_IncrementalClassifier == null) { m_IncrementalClassifier = cls; if (m_SkipBuild) { ((UpdateableClassifier) m_IncrementalClassifier).updateClassifier(inst); } else { data = new Instances(inst.dataset(), 1); data.add((Instance) inst.copy()); m_IncrementalClassifier.buildClassifier(data); } } else { ((UpdateableClassifier) m_IncrementalClassifier).updateClassifier(inst); } m_OutputToken = new Token( new WekaModelContainer(m_IncrementalClassifier, new Instances(inst.dataset(), 0))); } } } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to process data:", e); } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:adams.flow.transformer.WekaTrainClusterer.java
License:Open Source License
/** * Executes the flow item./* w w w .j a va 2 s .c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances data; Instance inst; weka.clusterers.Clusterer cls; WekaModelContainer cont; result = null; try { cls = null; if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instances)) { cls = getClustererInstance(); data = (Instances) m_InputToken.getPayload(); cls.buildClusterer(data); cont = new WekaModelContainer(cls, new Instances(data, 0), data); cont = m_PostProcessor.postProcess(cont); m_OutputToken = new Token(cont); } else if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instance)) { if (m_IncrementalClusterer == null) { cls = getClustererInstance(); if (!(cls instanceof UpdateableClusterer)) result = m_Clusterer + "/" + cls.getClass().getName() + " is not an incremental clusterer!"; } if (result == null) { inst = (Instance) m_InputToken.getPayload(); if (m_IncrementalClusterer == null) { m_IncrementalClusterer = cls; data = new Instances(inst.dataset(), 1); data.add((Instance) inst.copy()); m_IncrementalClusterer.buildClusterer(data); } else { ((UpdateableClusterer) m_IncrementalClusterer).updateClusterer(inst); ((UpdateableClusterer) m_IncrementalClusterer).updateFinished(); } m_OutputToken = new Token( new WekaModelContainer(m_IncrementalClusterer, new Instances(inst.dataset(), 0))); } } } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to process input: " + m_InputToken.getPayload(), e); } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:adams.gui.menu.AppendDatasets.java
License:Open Source License
/** * Performs the append.//from ww w .jav a 2s.c o m * * @param frame the frame to close * @param input the files to merge * @param output the output file */ protected void doAppend(ChildFrame frame, File[] input, File output) { Instances[] data; Instances full; int i; int n; AbstractFileLoader loader; DataSink sink; int count; TIntArrayList transferAtt; int index; if (input.length < 2) { GUIHelper.showErrorMessage(getOwner(), "At least two files are required!"); return; } // load and check compatibility loader = ConverterUtils.getLoaderForFile(input[0]); data = new Instances[input.length]; count = 0; transferAtt = new TIntArrayList(); for (i = 0; i < input.length; i++) { try { loader.setFile(input[i]); data[i] = DataSource.read(loader); if (i > 0) { if (!data[0].equalHeaders(data[i])) { GUIHelper.showErrorMessage(getOwner(), "Datasets '" + input[0] + "' and '" + input[i] + "' are not compatible!\n" + data[0].equalHeadersMsg(data[i])); return; } } else { for (n = 0; n < data[0].numAttributes(); n++) { if (data[0].attribute(n).isString() || data[0].attribute(n).isRelationValued()) transferAtt.add(n); } } count += data[i].numInstances(); } catch (Exception e) { GUIHelper.showErrorMessage(getOwner(), "Failed to read '" + input[i] + "'!\n" + Utils.throwableToString(e)); return; } } // combine full = new Instances(data[0], count); for (i = 0; i < data.length; i++) { for (Instance inst : data[i]) { if (transferAtt.size() > 0) { for (n = 0; n < transferAtt.size(); n++) { index = transferAtt.get(n); if (inst.attribute(index).isString()) full.attribute(index).addStringValue(inst.stringValue(index)); else if (inst.attribute(n).isRelationValued()) full.attribute(index).addRelation(inst.relationalValue(index)); else throw new IllegalStateException( "Unhandled attribute type: " + Attribute.typeToString(inst.attribute(index))); } } full.add(inst); } } // save try { sink = new DataSink(output.getAbsolutePath()); sink.write(full); } catch (Exception e) { GUIHelper.showErrorMessage(getOwner(), "Failed to save data to '" + output + "'!\n" + Utils.throwableToString(e)); return; } GUIHelper.showInformationMessage(null, "Successfully appended!\n" + output); frame.dispose(); }
From source file:adams.gui.visualization.debug.objectexport.WekaInstancesExporter.java
License:Open Source License
/** * Performs the actual export./*from w w w .j av a2 s. c om*/ * * @param obj the object to export * @param file the file to export to * @return null if successful, otherwise error message */ @Override protected String doExport(Object obj, File file) { Instances data; Instance inst; try { if (obj instanceof Instances) { DataSink.write(file.getAbsolutePath(), (Instances) obj); return null; } else { inst = (Instance) obj; if (inst.dataset() != null) { data = new Instances(inst.dataset()); data.add((Instance) inst.copy()); DataSink.write(file.getAbsolutePath(), data); return null; } else { return "Instance has no dataset associated, cannot export as ARFF!"; } } } catch (Exception e) { return "Failed to write Instances to '" + file + "'!\n" + Utils.throwableToString(e); } }