List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Set the format for the transformed data * @return a set of empty Instances (header only) in the new format * @throws Exception if the output format can't be set *//*from w w w.ja v a 2 s . c om*/ private Instances setOutputFormat() throws Exception { if (m_eigenvalues == null) { return null; } double cumulative = 0.0; FastVector attributes = new FastVector(); for (int i = m_numAttribs - 1; i >= 0; i--) { StringBuffer attName = new StringBuffer(); // build array of coefficients double[] coeff_mags = new double[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]); int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs; // this array contains the sorted indices of the coefficients int[] coeff_inds; if (m_numAttribs > 0) { // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude coeff_inds = Utils.sort(coeff_mags); } else { // if m_maxAttrsInName <= 0, use all coeffs in original order coeff_inds = new int[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) coeff_inds[j] = j; } // build final attName string for (int j = 0; j < num_attrs; j++) { double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]]; if (j > 0 && coeff_value >= 0) attName.append("+"); attName.append( Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name()); } if (num_attrs < m_numAttribs) attName.append("..."); attributes.addElement(new Attribute(attName.toString())); cumulative += m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (m_hasClass) { attributes.addElement(m_trainHeader.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes() - 1); } m_outputNumAtts = outputFormat.numAttributes(); return outputFormat; }
From source file:data.Regression.java
public int regression(String fileName) { String arffName = FileTransfer.transfer(fileName); try {/*from ww w .jav a 2 s.com*/ //load data Instances data = new Instances(new BufferedReader(new FileReader(arffName))); data.setClassIndex(data.numAttributes() - 1); //build model LinearRegression model = new LinearRegression(); model.buildClassifier(data); //the last instance with missing class is not used System.out.println(model); //classify the last instance Instance num = data.lastInstance(); int people = (int) model.classifyInstance(num); System.out.println("NumOfEnrolled (" + num + "): " + people); return people; } catch (Exception e) { e.printStackTrace(); System.out.println("Regression fail"); } return 0; }
From source file:data.RegressionDrop.java
public void regression() throws Exception { //public static void main(String[] args) throws Exception{ //load data/* www . jav a2 s .co m*/ Instances data = new Instances(new BufferedReader(new FileReader("NumOfDroppedByYear.arff"))); data.setClassIndex(data.numAttributes() - 1); //build model LinearRegression model = new LinearRegression(); model.buildClassifier(data); //the last instance with missing class is not used System.out.println(model); //classify the last instance Instance num = data.lastInstance(); int people = (int) model.classifyInstance(num); System.out.println("NumOfDropped (" + num + "): " + people); }
From source file:data.statistics.MLStatistics.java
License:Open Source License
/** * Calculates Phi and Chi-square correlation matrix. * * @param dataSet//from www. ja v a 2s. co m * A multi-label dataset. * @throws java.lang.Exception * To be handled in an upper level. */ public void calculatePhiChi2(MultiLabelInstances dataSet) throws Exception { numLabels = dataSet.getNumLabels(); // The indices of the label attributes int[] labelIndices; labelIndices = dataSet.getLabelIndices(); numLabels = dataSet.getNumLabels(); phi = new double[numLabels][numLabels]; chi2 = new double[numLabels][numLabels]; Remove remove = new Remove(); remove.setInvertSelection(true); remove.setAttributeIndicesArray(labelIndices); remove.setInputFormat(dataSet.getDataSet()); Instances result = Filter.useFilter(dataSet.getDataSet(), remove); result.setClassIndex(result.numAttributes() - 1); for (int i = 0; i < numLabels; i++) { int a[] = new int[numLabels]; int b[] = new int[numLabels]; int c[] = new int[numLabels]; int d[] = new int[numLabels]; double e[] = new double[numLabels]; double f[] = new double[numLabels]; double g[] = new double[numLabels]; double h[] = new double[numLabels]; for (int j = 0; j < result.numInstances(); j++) { for (int l = 0; l < numLabels; l++) { if (result.instance(j).stringValue(i).equals("0")) { if (result.instance(j).stringValue(l).equals("0")) { a[l]++; } else { c[l]++; } } else { if (result.instance(j).stringValue(l).equals("0")) { b[l]++; } else { d[l]++; } } } } for (int l = 0; l < numLabels; l++) { e[l] = a[l] + b[l]; f[l] = c[l] + d[l]; g[l] = a[l] + c[l]; h[l] = b[l] + d[l]; double mult = e[l] * f[l] * g[l] * h[l]; double denominator = Math.sqrt(mult); double nominator = a[l] * d[l] - b[l] * c[l]; phi[i][l] = nominator / denominator; chi2[i][l] = phi[i][l] * phi[i][l] * (a[l] + b[l] + c[l] + d[l]); } } }
From source file:DataMiningLogHistoriKIRI.ArffIO.java
public Instances readArff(String name) throws IOException { Instances data; data = new Instances(new BufferedReader(new FileReader("temp.arff"))); data.setClassIndex(data.numAttributes() - 1); return data;// ww w . j a va 2 s. co m }
From source file:de.citec.sc.matoll.classifiers.WEKAclassifier.java
public void train(List<Provenance> provenances, Set<String> pattern_lookup, Set<String> pos_lookup) throws IOException { String path = "matoll" + Language.toString() + ".arff"; writeVectors(provenances, path, pattern_lookup, pos_lookup); Instances inst = new Instances(new BufferedReader(new FileReader(path))); inst.setClassIndex(inst.numAttributes() - 1); try {/*from ww w . ja va 2 s . com*/ cls.buildClassifier(inst); // serialize model ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(path.replace(".arff", ".model"))); oos.writeObject(cls); oos.flush(); oos.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:de.citec.sc.matoll.classifiers.WEKAclassifier.java
public HashMap<Integer, Double> predict(Provenance provenance, Set<String> pattern_lookup, Set<String> pos_lookup) throws IOException, Exception { /*/*from w w w . ja va 2s. c o m*/ we want predict that the entry is true */ provenance.setAnnotation(1); List<Provenance> tmp_prov = new ArrayList<Provenance>(); tmp_prov.add(provenance); writeVectors(tmp_prov, "tmp.arff", pattern_lookup, pos_lookup); ArffLoader loader = new ArffLoader(); loader.setFile(new File("tmp.arff")); Instances structure = loader.getStructure(); structure.setClassIndex(structure.numAttributes() - 1); HashMap<Integer, Double> hm = new HashMap<Integer, Double>(); Instance current; while ((current = loader.getNextInstance(structure)) != null) { /* * value_to_predict * can be only 0 or 1, as only two classes are given */ double value = cls.classifyInstance(current); double[] percentage = cls.distributionForInstance(current); List<String> result = new ArrayList<String>(); int prediction = (int) value; double distribution = percentage[(int) value]; hm.put(prediction, distribution); } return hm; }
From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java
License:Apache License
/** * * This method creates a serialized WEKA model file from an .arff file containing the annotated gold standard * * * @param gs_arff the annotated gold standard in an .arff file * @param model output file for the model * @param output_eval if true, the evaluation of the trained classifier is printed (10-fold cross validation) * @throws Exception/* w w w. j a va 2s . c om*/ */ public static void createModelFromGoldstandard(String gs_arff, String model, boolean output_eval) throws Exception { DataSource source = new DataSource(gs_arff); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } Remove rm = new Remove(); rm.setAttributeIndices("1"); // remove ID attribute BayesNet bn = new BayesNet(); //Standard classifier; BNs proved most robust, but of course other classifiers are possible // meta-classifier FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(bn); fc.buildClassifier(data); // build classifier SerializationHelper.write(model, fc); if (output_eval) { Evaluation eval = new Evaluation(data); eval.crossValidateModel(fc, data, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); } }
From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java
License:Apache License
/** * * This method applies a serialized WEKA model file to an unlabeld .arff file for classification * * * @param input_arff the annotated gold standard in an .arff file * @param model output file for the model * @param output output file for evaluation of trained classifier (10-fold cross validation) * @throws Exception// www .ja v a2s. c o m */ public static void applyModelToUnlabeledArff(String input_arff, String model, String output) throws Exception { DataSource source = new DataSource(input_arff); Instances unlabeled = source.getDataSet(); if (unlabeled.classIndex() == -1) { unlabeled.setClassIndex(unlabeled.numAttributes() - 1); } Remove rm = new Remove(); rm.setAttributeIndices("1"); // remove ID attribute ObjectInputStream ois = new ObjectInputStream(new FileInputStream(model)); Classifier cls = (Classifier) ois.readObject(); ois.close(); // create copy Instances labeled = new Instances(unlabeled); // label instances for (int i = 0; i < unlabeled.numInstances(); i++) { double clsLabel = cls.classifyInstance(unlabeled.instance(i)); labeled.instance(i).setClassValue(clsLabel); } // save labeled data BufferedWriter writer = new BufferedWriter(new FileWriter(output)); writer.write(labeled.toString()); writer.newLine(); writer.flush(); writer.close(); }
From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java
License:Apache License
private Instances getInstances(File instancesFile) throws FileNotFoundException, IOException { Instances trainData = null; Reader reader;/*from ww w. j av a 2s .co m*/ if (instancesFile.getAbsolutePath().endsWith(".gz")) { reader = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(instancesFile)))); } else { reader = new BufferedReader(new FileReader(instancesFile)); } try { trainData = new Instances(reader); trainData.setClassIndex(trainData.numAttributes() - 1); } finally { reader.close(); } return trainData; }