List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:DocClassifier.java
public Instances createInstances(File[] files) { Instances instances = new Instances("Inst" + files.hashCode(), attrList, files.length); for (File file : files) { Instance inst = createInstance(file); inst.setDataset(instances);/*from ww w .ja va2 s. c om*/ instances.add(inst); instances.setClass((Attribute) attrList.lastElement()); } return instances; }
From source file:PrincipalComponents.java
License:Open Source License
/** * Gets the transformed training data.//from w w w . j av a 2 s . c o m * * @return the transformed training data * @throws Exception if transformed data can't be returned */ @Override public Instances transformedData(Instances data) throws Exception { if (m_eigenvalues == null) { throw new Exception("Principal components hasn't been built yet"); } Instances output = null; if (m_transBackToOriginal) { output = new Instances(m_originalSpaceFormat); } else { output = new Instances(m_transformedFormat); } for (int i = 0; i < data.numInstances(); i++) { Instance converted = convertInstance(data.instance(i)); output.add(converted); } return output; }
From source file:TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set.//from www . j a v a2 s. com * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); FastVector classes = new FastVector(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.addElement(enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.elementAt(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedReader is; if (m_charSet == null || m_charSet.length() == 0) { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt))); } else { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new Instance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
From source file:ArrayLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set./* w w w . j a v a 2 s. c o m*/ * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_data == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } m_cumulativeStructure = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { m_cumulativeStructure.addElement(new Hashtable()); } m_cumulativeInstances = new FastVector(); FastVector current; for (int i = 0; i < m_data.length; i++) { current = getInstance(m_data[i]); m_cumulativeInstances.addElement(current); } FastVector atts = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { String attname = m_structure.attribute(i).name(); Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i)); if (tempHash.size() == 0) { atts.addElement(new Attribute(attname)); } else { if (m_StringAttributes.isInRange(i)) { atts.addElement(new Attribute(attname, (FastVector) null)); } else { FastVector values = new FastVector(tempHash.size()); // add dummy objects in order to make the FastVector's size == capacity for (int z = 0; z < tempHash.size(); z++) { values.addElement("dummy"); } Enumeration e = tempHash.keys(); while (e.hasMoreElements()) { Object ob = e.nextElement(); // if (ob instanceof Double) { int index = ((Integer) tempHash.get(ob)).intValue(); String s = ob.toString(); if (s.startsWith("'") || s.startsWith("\"")) s = s.substring(1, s.length() - 1); values.setElementAt(new String(s), index); // } } atts.addElement(new Attribute(attname, values)); } } } // make the instances String relationName; relationName = "ArrayData"; Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size()); for (int i = 0; i < m_cumulativeInstances.size(); i++) { current = ((FastVector) m_cumulativeInstances.elementAt(i)); double[] vals = new double[dataSet.numAttributes()]; for (int j = 0; j < current.size(); j++) { Object cval = current.elementAt(j); if (cval instanceof String) { if (((String) cval).compareTo(m_MissingValue) == 0) { vals[j] = Instance.missingValue(); } else { if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue((String) cval); } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else { throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!"); } } } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue("" + cval); } else { vals[j] = ((Double) cval).doubleValue(); } } dataSet.add(new Instance(1.0, vals)); } m_structure = new Instances(dataSet, 0); m_cumulativeStructure = null; // conserve memory return dataSet; }
From source file:Pair.java
License:Open Source License
private void doCV(Instances targetData) throws Exception { System.out.println();/*from w ww .ja v a2s . c o m*/ System.out.flush(); int numSourceInstances = m_SourceInstances.numInstances(); int numInstances = targetData.numInstances() + numSourceInstances; numTargetInstances = numInstances - numSourceInstances; double weightSource, weightTarget; double initialSourceFraction; double[] weights = new double[numInstances]; Random randomInstance = new Random(1); Instances data = new Instances(m_SourceInstances, 0, numSourceInstances); // Now add the target data, shallow copying the instances as they are added // so it doesn't mess up the weights for anyone else Enumeration enumer = targetData.enumerateInstances(); while (enumer.hasMoreElements()) { Instance instance = (Instance) enumer.nextElement(); data.add(instance); } if (sourceRatio < 0) { //weight all equally weightSource = weightTarget = 1.0/*/numInstances*/; initialSourceFraction = numSourceInstances / (double) numInstances; } else { double totalWeight = 1 + sourceRatio; weightSource = sourceRatio / totalWeight/*/numSourceInstances*/; weightTarget = 1.0 / totalWeight/*/numTargetInstances*/; initialSourceFraction = weightSource; } for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(weightSource); else instance.setWeight(weightTarget); } if (doFraction) { for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) { sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular] if (sourceFraction > .995) sourceFraction = .995; //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances; double sourceWeight = (sourceFraction * numTargetInstances) / (numSourceInstances * (1 - sourceFraction)); for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(sourceWeight); else instance.setWeight(1); } buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); } } else { for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration -1:" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); for (int it = 0; it < sourceIterations; it++) { Instances sample = null; if (!resample || m_NumIterationsPerformed == 0) { sample = data; } else { double sum = data.sumOfWeights(); double[] sweights = new double[data.numInstances()]; for (int i = 0; i < sweights.length; i++) { sweights[i] = data.instance(i).weight() / sum; } sample = data.resampleWithWeights(randomInstance, sweights); } try { m_Classifiers[it].buildClassifier(sample); } catch (Exception e) { e.printStackTrace(); System.out.println("E: " + e); } sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations); setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false); for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); } } }
From source file:ArffLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined by a * call to getStructure then method should do so before processing the rest of * the data set./*from ww w . j av a2 s .co m*/ * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { Instances insts = null; try { if (m_sourceReader == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } // Read all instances Instance inst; insts = new Instances(m_structure, 0); while ((inst = m_ArffReader.readInstance(m_structure)) != null) { insts.add(inst); } // Instances readIn = new Instances(m_structure); } finally { if (m_sourceReader != null) { // close the stream m_sourceReader.close(); } } return insts; }
From source file:TextDirectoryToArff.java
License:Open Source License
public Instances createDataset(String directoryPath) throws Exception { FastVector atts = new FastVector(2); atts.addElement(new Attribute("filename", (FastVector) null)); atts.addElement(new Attribute("contents", (FastVector) null)); Instances data = new Instances("text_files_in_" + directoryPath, atts, 0); File dir = new File(directoryPath); String[] files = dir.list();/*from ww w . jav a 2s . c om*/ for (int i = 0; i < files.length; i++) { if (files[i].endsWith(".txt")) { try { double[] newInst = new double[2]; newInst[0] = (double) data.attribute(0).addStringValue(files[i]); File txt = new File(directoryPath + File.separator + files[i]); InputStreamReader is; is = new InputStreamReader(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString()); data.add(new Instance(1.0, newInst)); } catch (Exception e) { //System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]); } } } return data; }
From source file:SMO.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one * wrapper for multi-class problems.//from www .ja v a 2 s . c o m * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); /* Removes all the instances with weight equal to 0. MUST be done since condition (8) of Keerthi's paper is made with the assertion Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instances with weight 0!"); } insts = data; } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarySMO(); m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel())); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:MPCKMeans.java
License:Open Source License
/** * Generates a clusterer. Instances in data have to be * either all sparse or all non-sparse/*from www . jav a 2 s.com*/ * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been * generated successfully */ public void buildClusterer(Instances data) throws Exception { System.out.println("ML weight=" + m_MLweight); System.out.println("CL weight= " + m_CLweight); System.out.println("LOG term weight=" + m_logTermWeight); System.out.println("Regularizer weight= " + m_regularizerTermWeight); m_RandomNumberGenerator = new Random(m_RandomSeed); if (m_metric instanceof OfflineLearnableMetric) { m_isOfflineMetric = true; } else { m_isOfflineMetric = false; } // Don't rebuild the metric if it was already trained if (!m_metricBuilt) { m_metric.buildMetric(data); m_metricBuilt = true; m_metricLearner.setMetric(m_metric); m_metricLearner.setClusterer(this); m_metrics = new LearnableMetric[m_NumClusters]; m_metricLearners = new MPCKMeansMetricLearner[m_NumClusters]; for (int i = 0; i < m_metrics.length; i++) { if (m_useMultipleMetrics) { m_metrics[i] = (LearnableMetric) m_metric.clone(); m_metricLearners[i] = (MPCKMeansMetricLearner) m_metricLearner.clone(); m_metricLearners[i].setMetric(m_metrics[i]); m_metricLearners[i].setClusterer(this); } else { m_metrics[i] = m_metric; m_metricLearners[i] = m_metricLearner; } } } setInstances(data); m_ClusterCentroids = new Instances(m_Instances, m_NumClusters); m_ClusterAssignments = new int[m_Instances.numInstances()]; if (m_Instances.checkForNominalAttributes() && m_Instances.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle nominal attributes\n"); } m_ClusterCentroids = m_Initializer.initialize(); // if all instances are smoothed by the metric, the centroids // need to be smoothed too (note that this is independent of // centroid smoothing performed by K-Means) if (m_metric instanceof InstanceConverter) { System.out.println("Converting centroids..."); Instances convertedCentroids = new Instances(m_ClusterCentroids, m_NumClusters); for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) { Instance centroid = m_ClusterCentroids.instance(i); convertedCentroids.add(((InstanceConverter) m_metric).convertInstance(centroid)); } m_ClusterCentroids.delete(); for (int i = 0; i < convertedCentroids.numInstances(); i++) { m_ClusterCentroids.add(convertedCentroids.instance(i)); } } System.out.println("Done initializing clustering ..."); getIndexClusters(); if (m_verbose && m_Seedable) { printIndexClusters(); for (int i = 0; i < m_NumClusters; i++) { System.out.println("Centroid " + i + ": " + m_ClusterCentroids.instance(i)); } } // Some extra work for smoothing metrics if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing()) { SmoothingMetric smoothingMetric = (SmoothingMetric) m_metric; Instances smoothedCentroids = new Instances(m_Instances, m_NumClusters); for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) { Instance smoothedCentroid = smoothingMetric.smoothInstance(m_ClusterCentroids.instance(i)); smoothedCentroids.add(smoothedCentroid); } m_ClusterCentroids = smoothedCentroids; updateSmoothingMetrics(); } runKMeans(); }
From source file:PCADetector.java
License:Apache License
public Instances getInstances() { int numAtts = m_oriDataMatrix.size(); if (numAtts < 0) return null; ArrayList<Attribute> atts = new ArrayList<Attribute>(numAtts); for (int att = 0; att < numAtts; att++) { atts.add(new Attribute(Integer.toString(att), att)); }/*from w w w. j a v a2s .c om*/ int numInstances = m_oriDataMatrix.get(0).size(); if (numInstances <= 0) return null; Instances dataset = new Instances("MetricInstances", atts, numInstances); for (int inst = 0; inst < numInstances; inst++) { Instance newInst = new DenseInstance(numAtts); for (int att = 0; att < numAtts; att++) { newInst.setValue(att, m_oriDataMatrix.get(att).get(inst)); } dataset.add(newInst); } return dataset; }