List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:ID3Chi.java
License:Open Source License
/** * Method for building an ID3Chi tree.// w w w . j av a 2 s . c o m * * @param data * the training data * @exception Exception * if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. /* if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } /**/ if (data.numInstances() == 0) { SetNullDistribution(data); } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); double entropyOfAllData = computeEntropy(data); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att, entropyOfAllData); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); double chiSquare = computeChiSquare(data, m_Attribute); int degreesOfFreedom = m_Attribute.numValues() - 1; ChiSquaredDistribution chi = new ChiSquaredDistribution(degreesOfFreedom); double threshold = chi.inverseCumulativeProbability(m_confidenceLevel); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { MakeALeaf(data); } else { // Discard unknown values for selected attribute //data.deleteWithMissing(m_Attribute); Instances[] subset = splitData(data, m_Attribute); if (CheckIfCanApplyChiSquare(subset) && (chiSquare <= threshold)) { MakeALeaf(data); return; } m_Successors = new ID3Chi[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new ID3Chi(this.m_confidenceLevel); m_Successors[j].m_Ratio = (double) subset[j].numInstances() / (double) data.numInstances(); m_Successors[j].makeTree(subset[j]); } } }
From source file:aaa.util.test.CreateArff.java
License:Open Source License
/** * Generates the Instances object and outputs it in ARFF format to stdout. * * @param args ignored//from ww w.ja v a 2s . c om * @throws Exception if generation of instances fails */ public static void main(String[] args) throws Exception { ArrayList<Attribute> atts; ArrayList<Attribute> attsRel; ArrayList<String> attVals; ArrayList<String> attValsRel; Instances data; Instances dataRel; double[] vals; double[] valsRel; int i; // 1. set up attributes atts = new ArrayList<Attribute>(); // - numeric atts.add(new Attribute("att1")); // - nominal attVals = new ArrayList<String>(); for (i = 0; i < 5; i++) attVals.add("val" + (i + 1)); atts.add(new Attribute("att2", attVals)); // - string atts.add(new Attribute("att3", (ArrayList<String>) null)); // - date atts.add(new Attribute("att4", "yyyy-MM-dd")); // - relational attsRel = new ArrayList<Attribute>(); // -- numeric attsRel.add(new Attribute("att5.1")); // -- nominal attValsRel = new ArrayList<String>(); for (i = 0; i < 5; i++) attValsRel.add("val5." + (i + 1)); attsRel.add(new Attribute("att5.2", attValsRel)); dataRel = new Instances("att5", attsRel, 0); atts.add(new Attribute("att5", dataRel, 0)); // 2. create Instances object data = new Instances("MyRelation", atts, 0); // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = Math.PI; // - nominal vals[1] = attVals.indexOf("val3"); // - string vals[2] = data.attribute(2).addStringValue("This is a string!"); // - date vals[3] = data.attribute(3).parseDate("2001-11-09"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.PI + 1; valsRel[1] = attValsRel.indexOf("val5.3"); dataRel.add(new DenseInstance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.PI + 2; valsRel[1] = attValsRel.indexOf("val5.2"); dataRel.add(new DenseInstance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new DenseInstance(1.0, vals)); // second instance vals = new double[data.numAttributes()]; // important: needs NEW array! // - numeric vals[0] = Math.E; // - nominal vals[1] = attVals.indexOf("val1"); // - string vals[2] = data.attribute(2).addStringValue("And another one!"); // - date vals[3] = data.attribute(3).parseDate("2000-12-01"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.E + 1; valsRel[1] = attValsRel.indexOf("val5.4"); dataRel.add(new DenseInstance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.E + 2; valsRel[1] = attValsRel.indexOf("val5.1"); dataRel.add(new DenseInstance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new DenseInstance(1.0, vals)); // 4. output data System.out.println(data); }
From source file:adams.data.conversion.SpreadSheetToWekaInstances.java
License:Open Source License
/** * Performs the actual conversion./*from ww w.j a v a 2 s . co m*/ * * @return the converted data * @throws Exception if something goes wrong with the conversion */ @Override protected Object doConvert() throws Exception { Instances result; SpreadSheet sheet; DenseInstance inst; ArrayList<Attribute> atts; HashSet<String> unique; ArrayList<String> labels; Row row; Cell cell; int i; int n; double[] values; Collection<ContentType> types; ContentType type; boolean added; int[] classIndices; sheet = (SpreadSheet) m_Input; // create header atts = new ArrayList<>(); for (i = 0; i < sheet.getColumnCount(); i++) { added = false; types = sheet.getContentTypes(i); if (types.contains(ContentType.DOUBLE)) types.remove(ContentType.LONG); if (types.contains(ContentType.LONG)) { types.add(ContentType.DOUBLE); types.remove(ContentType.LONG); } if (types.size() == 1) { type = (ContentType) types.toArray()[0]; if (type == ContentType.DOUBLE) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent())); added = true; } else if (type == ContentType.DATE) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), Constants.TIMESTAMP_FORMAT)); added = true; } else if (type == ContentType.TIME) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), Constants.TIME_FORMAT)); added = true; } } if (!added) { unique = new HashSet<>(); for (n = 0; n < sheet.getRowCount(); n++) { row = sheet.getRow(n); cell = row.getCell(i); if ((cell != null) && !cell.isMissing()) unique.add(cell.getContent()); } if ((unique.size() > m_MaxLabels) || (m_MaxLabels < 1)) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), (FastVector) null)); } else { labels = new ArrayList<>(unique); Collections.sort(labels); atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), labels)); } } } result = new Instances(Environment.getInstance().getProject(), atts, sheet.getRowCount()); if (sheet.hasName()) result.setRelationName(sheet.getName()); // add data for (n = 0; n < sheet.getRowCount(); n++) { row = sheet.getRow(n); values = new double[result.numAttributes()]; for (i = 0; i < result.numAttributes(); i++) { cell = row.getCell(i); values[i] = weka.core.Utils.missingValue(); if ((cell != null) && !cell.isMissing()) { if (result.attribute(i).type() == Attribute.DATE) { if (cell.isTime()) values[i] = cell.toTime().getTime(); else values[i] = cell.toDate().getTime(); } else if (result.attribute(i).isNumeric()) { values[i] = Utils.toDouble(cell.getContent()); } else if (result.attribute(i).isString()) { values[i] = result.attribute(i).addStringValue(cell.getContent()); } else { values[i] = result.attribute(i).indexOfValue(cell.getContent()); } } } inst = new DenseInstance(1.0, values); result.add(inst); } if (sheet instanceof Dataset) { classIndices = ((Dataset) sheet).getClassAttributeIndices(); if (classIndices.length > 0) result.setClassIndex(classIndices[0]); } return result; }
From source file:adams.data.conversion.WekaInstancesToSpreadSheet.java
License:Open Source License
/** * Performs the actual conversion./* ww w . j a v a 2 s. c o m*/ * * @return the converted data * @throws Exception if something goes wrong with the conversion */ @Override protected Object doConvert() throws Exception { SpreadSheet result; Instances data; Row row; int i; int n; String str; data = (Instances) m_Input; // special case for InstancesViews if (m_SpreadSheetType instanceof InstancesView) { result = new InstancesView((Instances) m_Input); return result; } // create header result = m_SpreadSheetType.newInstance(); result.setDataRowClass(m_DataRowType.getClass()); row = result.getHeaderRow(); for (n = 0; n < data.numAttributes(); n++) row.addCell("" + n).setContent(data.attribute(n).name()); if (result instanceof Dataset) { if (data.classIndex() != -1) ((Dataset) result).setClassAttribute(data.classIndex(), true); } // fill spreadsheet for (i = 0; i < data.numInstances(); i++) { row = result.addRow("" + i); for (n = 0; n < data.numAttributes(); n++) { if (data.instance(i).isMissing(n)) continue; if (data.attribute(n).type() == Attribute.DATE) { row.addCell("" + n).setContent(new DateTimeMsec(new Date((long) data.instance(i).value(n)))); } else if (data.attribute(n).type() == Attribute.NUMERIC) { row.addCell("" + n).setContent(data.instance(i).value(n)); } else { str = data.instance(i).stringValue(n); if (str.equals(SpreadSheet.MISSING_VALUE)) row.addCell("" + n).setContentAsString("'" + str + "'"); else row.addCell("" + n).setContentAsString(str); } } } return result; }
From source file:adams.data.conversion.WekaInstancesToTimeseries.java
License:Open Source License
/** * Performs the actual conversion./*from w ww .j ava2 s .c o m*/ * * @return the converted data * @throws Exception if something goes wrong with the conversion */ @Override protected Object doConvert() throws Exception { Timeseries result; Instances input; Instance inst; int indexDate; int indexValue; TimeseriesPoint point; int i; Date timestamp; double value; input = (Instances) m_Input; // determine attribute indices m_DateAttribute.setData(input); indexDate = m_DateAttribute.getIntIndex(); if (indexDate == -1) throw new IllegalStateException("Failed to located date attribute: " + m_DateAttribute.getIndex()); m_ValueAttribute.setData(input); indexValue = m_ValueAttribute.getIntIndex(); if (indexValue == -1) throw new IllegalStateException("Failed to located value attribute: " + m_ValueAttribute.getIndex()); result = new Timeseries(input.relationName() + "-" + input.attribute(indexValue).name()); for (i = 0; i < input.numInstances(); i++) { inst = input.instance(i); if (!inst.isMissing(indexDate) && !inst.isMissing(indexValue)) { timestamp = new Date((long) inst.value(indexDate)); value = inst.value(indexValue); point = new TimeseriesPoint(timestamp, value); result.add(point); } } return result; }
From source file:adams.data.instances.InstanceComparator.java
License:Open Source License
/** * Compares its two arguments for order. Returns a negative integer, * zero, or a positive integer as the first argument is less than, equal * to, or greater than the second.//from w w w . j av a2 s. c o m * * @param o1 the first object to be compared. * @param o2 the second object to be compared. * @return a negative integer, zero, or a positive integer as the * first argument is less than, equal to, or greater than the * second. */ @Override public int compare(Instance o1, Instance o2) { int result; Instances header; int i; int weight; double d1; double d2; result = 0; header = o1.dataset(); i = 0; while ((result == 0) && (i < m_Indices.length)) { if (o1.isMissing(m_Indices[i]) && o2.isMissing(m_Indices[i])) result = 0; else if (o1.isMissing(m_Indices[i])) result = -1; else if (o2.isMissing(m_Indices[i])) result = +1; else if (header.attribute(m_Indices[i]).isNumeric()) { d1 = o1.value(m_Indices[i]); d2 = o2.value(m_Indices[i]); if (d1 < d2) result = -1; else if (d1 == d2) result = 0; else result = +1; } else { result = o1.stringValue(m_Indices[i]).compareTo(o2.stringValue(m_Indices[i])); } if (!m_Ascending[i]) result = -result; // add weight to index weight = (int) Math.pow(10, (m_Indices.length - i)); result *= weight; i++; } return result; }
From source file:adams.data.instancesanalysis.PCA.java
License:Open Source License
/** * Create a spreadsheet to output from the coefficients 2D array * * @param data the underlying dataset/* w ww.j ava2 s. c o m*/ * @param coeff The coefficients from the principal components analysis * @return A spreadsheet containing the components */ protected SpreadSheet extractLoadings(Instances data, ArrayList<ArrayList<Double>> coeff) { SpreadSheet result; Row row; int i; int n; result = new DefaultSpreadSheet(); row = result.getHeaderRow(); for (i = 0; i < coeff.size(); i++) row.addCell("L" + (i + 1)).setContent("Loading-" + (i + 1)); row.addCell("I").setContent("Index"); row.addCell("A").setContent("Attribute"); // add the index/attribute name column for (n = 0; n < m_NumAttributes; n++) { row = result.addRow(); row.addCell("I").setContent(n + 1); row.addCell("A").setContent(data.attribute(n).name()); } //each arraylist is a single column for (i = 0; i < coeff.size(); i++) { for (n = 0; n < m_NumAttributes; n++) { row = result.getRow(n); //attribute was kept earlier if (m_Kept.contains(n)) { int index = m_Kept.indexOf(n); if (index < coeff.get(i).size()) { double value = coeff.get(i).get(index); row.addCell("L" + (i + 1)).setContent(value); } else { row.addCell("L" + (i + 1)).setContent(0); } } //attribute wasn't kept, coefficient is 0 else { row.addCell("L" + (i + 1)).setContent(0); } } } return result; }
From source file:adams.data.instancesanalysis.PCA.java
License:Open Source License
/** * Performs the actual analysis.// w ww. j a v a2 s. c o m * * @param data the data to analyze * @return null if successful, otherwise error message * @throws Exception if analysis fails */ @Override protected String doAnalyze(Instances data) throws Exception { String result; Remove remove; PublicPrincipalComponents pca; int i; Capabilities caps; PartitionedMultiFilter2 part; Range rangeUnsupported; Range rangeSupported; TIntList listNominal; Range rangeNominal; ArrayList<ArrayList<Double>> coeff; Instances filtered; SpreadSheet transformed; WekaInstancesToSpreadSheet conv; String colName; result = null; m_Loadings = null; m_Scores = null; if (!m_AttributeRange.isAllRange()) { if (isLoggingEnabled()) getLogger().info("Filtering attribute range: " + m_AttributeRange.getRange()); remove = new Remove(); remove.setAttributeIndicesArray(m_AttributeRange.getIntIndices()); remove.setInvertSelection(true); remove.setInputFormat(data); data = Filter.useFilter(data, remove); } if (isLoggingEnabled()) getLogger().info("Performing PCA..."); listNominal = new TIntArrayList(); if (m_SkipNominal) { for (i = 0; i < data.numAttributes(); i++) { if (i == data.classIndex()) continue; if (data.attribute(i).isNominal()) listNominal.add(i); } } // check for unsupported attributes caps = new PublicPrincipalComponents().getCapabilities(); m_Supported = new TIntArrayList(); m_Unsupported = new TIntArrayList(); for (i = 0; i < data.numAttributes(); i++) { if (!caps.test(data.attribute(i)) || (i == data.classIndex()) || (listNominal.contains(i))) m_Unsupported.add(i); else m_Supported.add(i); } data.setClassIndex(-1); m_NumAttributes = m_Supported.size(); // the principal components will delete the attributes without any distinct values. // this checks which instances will be kept. m_Kept = new ArrayList<>(); for (i = 0; i < m_Supported.size(); i++) { if (data.numDistinctValues(m_Supported.get(i)) > 1) m_Kept.add(m_Supported.get(i)); } // build a model using the PublicPrincipalComponents pca = new PublicPrincipalComponents(); pca.setMaximumAttributes(m_MaxAttributes); pca.setVarianceCovered(m_Variance); pca.setMaximumAttributeNames(m_MaxAttributeNames); part = null; if (m_Unsupported.size() > 0) { rangeUnsupported = new Range(); rangeUnsupported.setMax(data.numAttributes()); rangeUnsupported.setIndices(m_Unsupported.toArray()); rangeSupported = new Range(); rangeSupported.setMax(data.numAttributes()); rangeSupported.setIndices(m_Supported.toArray()); part = new PartitionedMultiFilter2(); part.setFilters(new Filter[] { pca, new AllFilter(), }); part.setRanges(new weka.core.Range[] { new weka.core.Range(rangeSupported.getRange()), new weka.core.Range(rangeUnsupported.getRange()), }); } try { if (part != null) part.setInputFormat(data); else pca.setInputFormat(data); } catch (Exception e) { result = Utils.handleException(this, "Failed to set data format", e); } transformed = null; if (result == null) { try { if (part != null) filtered = weka.filters.Filter.useFilter(data, part); else filtered = weka.filters.Filter.useFilter(data, pca); } catch (Exception e) { result = Utils.handleException(this, "Failed to apply filter", e); filtered = null; } if (filtered != null) { conv = new WekaInstancesToSpreadSheet(); conv.setInput(filtered); result = conv.convert(); if (result == null) { transformed = (SpreadSheet) conv.getOutput(); // shorten column names again if (part != null) { for (i = 0; i < transformed.getColumnCount(); i++) { colName = transformed.getColumnName(i); colName = colName.replaceFirst("filtered-[0-9]*-", ""); transformed.getHeaderRow().getCell(i).setContentAsString(colName); } } } } } if (result == null) { // get the coefficients from the filter m_Scores = transformed; coeff = pca.getCoefficients(); m_Loadings = extractLoadings(data, coeff); m_Loadings.setName("Loadings for " + data.relationName()); } return result; }
From source file:adams.data.instancesanalysis.pls.AbstractMultiClassPLS.java
License:Open Source License
/** * Determines the output format based on the input format and returns this. * * @param input the input format to base the output format on * @return the output format//from w w w . j a v a2 s. c o m * @throws Exception in case the determination goes wrong */ @Override public Instances determineOutputFormat(Instances input) throws Exception { ArrayList<Attribute> atts; String prefix; int i; Instances result; List<String> classes; // collect classes m_ClassAttributeIndices = new TIntArrayList(); classes = new ArrayList<>(); for (i = 0; i < input.numAttributes(); i++) { if (m_ClassAttributes.isMatch(input.attribute(i).name())) { classes.add(input.attribute(i).name()); m_ClassAttributeIndices.add(i); } } if (!classes.contains(input.classAttribute().name())) { classes.add(input.classAttribute().name()); m_ClassAttributeIndices.add(input.classAttribute().index()); } // generate header atts = new ArrayList<>(); prefix = getClass().getSimpleName(); for (i = 0; i < getNumComponents(); i++) atts.add(new Attribute(prefix + "_" + (i + 1))); for (String cls : classes) atts.add(new Attribute(cls)); result = new Instances(prefix, atts, 0); result.setClassIndex(result.numAttributes() - 1); m_OutputFormat = result; return result; }
From source file:adams.flow.sink.WekaAttributeSummary.java
License:Open Source License
/** * Displays the token (the panel and dialog have already been created at * this stage)./*from w w w . j av a 2 s . c o m*/ * * @param token the token to display */ @Override protected void display(Token token) { Instances inst; int[] indices; AttributeVisualizationPanel panel; inst = (Instances) token.getPayload(); m_Range.setData(inst); indices = m_Range.getIntIndices(); clearPanel(); if (indices.length == 1) { m_PanelAtt = new AttributeVisualizationPanel(); m_PanelAtt.setInstances(inst); m_PanelAtt.setAttribute(indices[0]); m_Panel.add(m_PanelAtt, BorderLayout.CENTER); } else if (indices.length > 1) { m_TabbedPane = new BaseTabbedPane(); m_Panel.add(m_TabbedPane, BorderLayout.CENTER); for (int index : indices) { panel = new AttributeVisualizationPanel(); panel.setInstances(inst); panel.setAttribute(index); m_TabbedPane.addTab(inst.attribute(index).name(), panel); } } }