Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Set the format for the transformed data
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 *//*from w  w  w.ja  v  a 2  s .  c  om*/
private Instances setOutputFormat() throws Exception {
    if (m_eigenvalues == null) {
        return null;
    }

    double cumulative = 0.0;
    FastVector attributes = new FastVector();
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        StringBuffer attName = new StringBuffer();
        // build array of coefficients
        double[] coeff_mags = new double[m_numAttribs];
        for (int j = 0; j < m_numAttribs; j++)
            coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
        int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
        // this array contains the sorted indices of the coefficients
        int[] coeff_inds;
        if (m_numAttribs > 0) {
            // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude
            coeff_inds = Utils.sort(coeff_mags);
        } else {
            // if  m_maxAttrsInName <= 0, use all coeffs in original order
            coeff_inds = new int[m_numAttribs];
            for (int j = 0; j < m_numAttribs; j++)
                coeff_inds[j] = j;
        }
        // build final attName string
        for (int j = 0; j < num_attrs; j++) {
            double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
            if (j > 0 && coeff_value >= 0)
                attName.append("+");
            attName.append(
                    Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name());
        }
        if (num_attrs < m_numAttribs)
            attName.append("...");

        attributes.addElement(new Attribute(attName.toString()));
        cumulative += m_eigenvalues[m_sortedEigens[i]];

        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (m_hasClass) {
        attributes.addElement(m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components",
            attributes, 0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    m_outputNumAtts = outputFormat.numAttributes();
    return outputFormat;
}

From source file:data.Regression.java

public int regression(String fileName) {

    String arffName = FileTransfer.transfer(fileName);

    try {/*from ww  w  .jav  a  2 s.com*/
        //load data
        Instances data = new Instances(new BufferedReader(new FileReader(arffName)));
        data.setClassIndex(data.numAttributes() - 1);
        //build model
        LinearRegression model = new LinearRegression();
        model.buildClassifier(data);
        //the last instance with missing class is not used
        System.out.println(model);
        //classify the last instance
        Instance num = data.lastInstance();
        int people = (int) model.classifyInstance(num);
        System.out.println("NumOfEnrolled (" + num + "): " + people);
        return people;
    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("Regression fail");
    }
    return 0;
}

From source file:data.RegressionDrop.java

public void regression() throws Exception {

    //public static void main(String[] args) throws Exception{

    //load data/* www  . jav a2 s  .co  m*/
    Instances data = new Instances(new BufferedReader(new FileReader("NumOfDroppedByYear.arff")));
    data.setClassIndex(data.numAttributes() - 1);
    //build model
    LinearRegression model = new LinearRegression();
    model.buildClassifier(data);
    //the last instance with missing class is not used
    System.out.println(model);
    //classify the last instance
    Instance num = data.lastInstance();
    int people = (int) model.classifyInstance(num);
    System.out.println("NumOfDropped (" + num + "): " + people);
}

From source file:data.statistics.MLStatistics.java

License:Open Source License

/**
 * Calculates Phi and Chi-square correlation matrix.
 *
 * @param dataSet//from   www. ja v a 2s. co m
 *            A multi-label dataset.
 * @throws java.lang.Exception
 *             To be handled in an upper level.
 */
public void calculatePhiChi2(MultiLabelInstances dataSet) throws Exception {
    numLabels = dataSet.getNumLabels();

    // The indices of the label attributes
    int[] labelIndices;

    labelIndices = dataSet.getLabelIndices();
    numLabels = dataSet.getNumLabels();
    phi = new double[numLabels][numLabels];
    chi2 = new double[numLabels][numLabels];

    Remove remove = new Remove();
    remove.setInvertSelection(true);
    remove.setAttributeIndicesArray(labelIndices);
    remove.setInputFormat(dataSet.getDataSet());
    Instances result = Filter.useFilter(dataSet.getDataSet(), remove);
    result.setClassIndex(result.numAttributes() - 1);

    for (int i = 0; i < numLabels; i++) {
        int a[] = new int[numLabels];
        int b[] = new int[numLabels];
        int c[] = new int[numLabels];
        int d[] = new int[numLabels];
        double e[] = new double[numLabels];
        double f[] = new double[numLabels];
        double g[] = new double[numLabels];
        double h[] = new double[numLabels];
        for (int j = 0; j < result.numInstances(); j++) {
            for (int l = 0; l < numLabels; l++) {
                if (result.instance(j).stringValue(i).equals("0")) {
                    if (result.instance(j).stringValue(l).equals("0")) {
                        a[l]++;
                    } else {
                        c[l]++;
                    }
                } else {
                    if (result.instance(j).stringValue(l).equals("0")) {
                        b[l]++;
                    } else {
                        d[l]++;
                    }
                }
            }
        }
        for (int l = 0; l < numLabels; l++) {
            e[l] = a[l] + b[l];
            f[l] = c[l] + d[l];
            g[l] = a[l] + c[l];
            h[l] = b[l] + d[l];
            double mult = e[l] * f[l] * g[l] * h[l];
            double denominator = Math.sqrt(mult);
            double nominator = a[l] * d[l] - b[l] * c[l];
            phi[i][l] = nominator / denominator;
            chi2[i][l] = phi[i][l] * phi[i][l] * (a[l] + b[l] + c[l] + d[l]);
        }
    }
}

From source file:DataMiningLogHistoriKIRI.ArffIO.java

public Instances readArff(String name) throws IOException {
    Instances data;
    data = new Instances(new BufferedReader(new FileReader("temp.arff")));
    data.setClassIndex(data.numAttributes() - 1);
    return data;// ww  w  .  j a  va  2 s. co  m
}

From source file:de.citec.sc.matoll.classifiers.WEKAclassifier.java

public void train(List<Provenance> provenances, Set<String> pattern_lookup, Set<String> pos_lookup)
        throws IOException {
    String path = "matoll" + Language.toString() + ".arff";
    writeVectors(provenances, path, pattern_lookup, pos_lookup);
    Instances inst = new Instances(new BufferedReader(new FileReader(path)));
    inst.setClassIndex(inst.numAttributes() - 1);
    try {/*from  ww w .  ja  va 2  s  .  com*/
        cls.buildClassifier(inst);
        // serialize model
        ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(path.replace(".arff", ".model")));
        oos.writeObject(cls);
        oos.flush();
        oos.close();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:de.citec.sc.matoll.classifiers.WEKAclassifier.java

public HashMap<Integer, Double> predict(Provenance provenance, Set<String> pattern_lookup,
        Set<String> pos_lookup) throws IOException, Exception {

    /*/*from w w  w .  ja va  2s.  c o m*/
    we want predict that the entry is true
    */
    provenance.setAnnotation(1);
    List<Provenance> tmp_prov = new ArrayList<Provenance>();
    tmp_prov.add(provenance);
    writeVectors(tmp_prov, "tmp.arff", pattern_lookup, pos_lookup);

    ArffLoader loader = new ArffLoader();
    loader.setFile(new File("tmp.arff"));
    Instances structure = loader.getStructure();
    structure.setClassIndex(structure.numAttributes() - 1);
    HashMap<Integer, Double> hm = new HashMap<Integer, Double>();

    Instance current;
    while ((current = loader.getNextInstance(structure)) != null) {
        /*
        * value_to_predict
        * can be only 0 or 1, as only two classes are given 
        */

        double value = cls.classifyInstance(current);
        double[] percentage = cls.distributionForInstance(current);

        List<String> result = new ArrayList<String>();
        int prediction = (int) value;
        double distribution = percentage[(int) value];
        hm.put(prediction, distribution);
    }
    return hm;
}

From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java

License:Apache License

/**
 *
 * This method creates a serialized WEKA model file from an .arff file containing the annotated gold standard
 *
 *
 * @param gs_arff the annotated gold standard in an .arff file
 * @param model output file for the model
 * @param output_eval if true, the evaluation of the trained classifier is printed (10-fold cross validation)
 * @throws Exception/*  w  w w. j a  va  2s  . c om*/
 */

public static void createModelFromGoldstandard(String gs_arff, String model, boolean output_eval)
        throws Exception {
    DataSource source = new DataSource(gs_arff);
    Instances data = source.getDataSet();
    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }

    Remove rm = new Remove();
    rm.setAttributeIndices("1"); // remove ID  attribute

    BayesNet bn = new BayesNet(); //Standard classifier; BNs proved most robust, but of course other classifiers are possible
    // meta-classifier
    FilteredClassifier fc = new FilteredClassifier();
    fc.setFilter(rm);
    fc.setClassifier(bn);
    fc.buildClassifier(data); // build classifier
    SerializationHelper.write(model, fc);
    if (output_eval) {
        Evaluation eval = new Evaluation(data);
        eval.crossValidateModel(fc, data, 10, new Random(1));
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
        System.out.println(eval.toClassDetailsString());
    }

}

From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java

License:Apache License

/**
 *
 * This method applies a serialized WEKA model file to an unlabeld .arff file for classification
 *
 *
 * @param input_arff the annotated gold standard in an .arff file
 * @param model output file for the model
 * @param output output file for evaluation of trained classifier (10-fold cross validation)
 * @throws Exception// www  .ja  v a2s. c  o m
 */

public static void applyModelToUnlabeledArff(String input_arff, String model, String output) throws Exception {
    DataSource source = new DataSource(input_arff);
    Instances unlabeled = source.getDataSet();
    if (unlabeled.classIndex() == -1) {
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);
    }

    Remove rm = new Remove();
    rm.setAttributeIndices("1"); // remove ID  attribute

    ObjectInputStream ois = new ObjectInputStream(new FileInputStream(model));
    Classifier cls = (Classifier) ois.readObject();
    ois.close();
    // create copy
    Instances labeled = new Instances(unlabeled);

    // label instances
    for (int i = 0; i < unlabeled.numInstances(); i++) {
        double clsLabel = cls.classifyInstance(unlabeled.instance(i));
        labeled.instance(i).setClassValue(clsLabel);
    }
    // save labeled data
    BufferedWriter writer = new BufferedWriter(new FileWriter(output));
    writer.write(labeled.toString());
    writer.newLine();
    writer.flush();
    writer.close();

}

From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java

License:Apache License

private Instances getInstances(File instancesFile) throws FileNotFoundException, IOException {
    Instances trainData = null;
    Reader reader;/*from   ww w.  j  av  a 2s  .co  m*/
    if (instancesFile.getAbsolutePath().endsWith(".gz")) {
        reader = new BufferedReader(
                new InputStreamReader(new GZIPInputStream(new FileInputStream(instancesFile))));
    } else {
        reader = new BufferedReader(new FileReader(instancesFile));
    }

    try {
        trainData = new Instances(reader);
        trainData.setClassIndex(trainData.numAttributes() - 1);
    } finally {
        reader.close();
    }

    return trainData;
}