Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

/**
 * Returns a list of all Attributes, *including* the class attribute if
 * it is set. Note that using Instance.enumerateAttributes() will *skip*
 * the class attribute./*from w  w  w .  java 2s.c  o  m*/
 * @param instances
 * @return
 */
public static ArrayList<Attribute> extractAttributes(final Instances instances) {
    final ArrayList<Attribute> attributes = new ArrayList<Attribute>(instances.numAttributes());
    for (int i = 0; i < instances.numAttributes(); ++i) {
        attributes.add(instances.attribute(i));
    }
    return attributes;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

/**
 * Returns a list of all Attributes, *excluding* the class attribute if
 * it is set./*from   w  ww  . ja  v  a 2s.  c  om*/
 * @param instances
 * @return
 */
public static ArrayList<Attribute> extractUnlabeledAttributes(final Instances instances) {
    final ArrayList<Attribute> attributes = new ArrayList<Attribute>(instances.numAttributes());
    for (int i = 0; i < instances.numAttributes(); ++i) {
        if (i == instances.classIndex()) {
            continue;
        }
        attributes.add(instances.attribute(i));
    }
    return attributes;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

public static Instances powerSet(final Instances D, final int n) {
    final Attribute class_attr = D.classAttribute();

    final ImmutableSet.Builder<Integer> b = new ImmutableSet.Builder<Integer>();
    final int Nattr = class_attr != null ? D.numAttributes() - 1 : D.numAttributes();
    for (final int i : Fn.range(1, Nattr)) {
        b.add(i);/* w  w  w  .j  av  a2 s  .c o  m*/
    }
    final Set<Set<Integer>> index = Sets.powerSet(b.build());

    final ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (final Set<Integer> subset : index) {
        if (subset.isEmpty() || subset.size() > n) {
            continue;
        }

        final StringBuilder attr_name = new StringBuilder();
        int count = 0;
        for (final Integer i : subset) {
            if (count++ > 0) {
                attr_name.append("_x_");
            }
            attr_name.append(D.attribute(i).name());
        }

        attributes.add(new Attribute(attr_name.toString()));
    }
    if (class_attr != null) {
        assert (class_attr.isNominal());
        attributes.add(WekaUtil.createNominalAttribute(class_attr.name(), class_attr.numValues()));
    }

    final String Pname = "P" + n + "_" + D.relationName();
    final Instances P = new Instances(Pname, attributes, 0);
    if (class_attr != null) {
        P.setClassIndex(attributes.size() - 1);
    }

    for (final Instance inst : D) {
        final double[] xp = new double[attributes.size()];
        int idx = 0;
        for (final Set<Integer> subset : index) {
            if (subset.isEmpty() || subset.size() > n) {
                continue;
            }

            double p = 1.0;
            for (final Integer i : subset) {
                p *= inst.value(i);
            }
            xp[idx++] = p;
        }
        if (class_attr != null) {
            xp[idx++] = inst.classValue();
        }

        WekaUtil.addInstance(P, new DenseInstance(inst.weight(), xp));
    }

    return P;
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java

License:Open Source License

public static Instances allPairwiseProducts(final Instances single, final boolean reflexive,
        final boolean symmetric) {
    final int c = single.classIndex();
    System.out.println("Class attribute = " + c);

    final ArrayList<Attribute> pair_attributes = new ArrayList<Attribute>();
    for (int i = 0; i < single.numAttributes(); ++i) {
        if (i == c) {
            continue;
        }//  ww w .  ja v  a  2s  . c  o m
        final Attribute ai = single.attribute(i);
        final int j0 = (symmetric ? 0 : i);
        for (int j = j0; j < single.numAttributes(); ++j) {
            if (j == c) {
                continue;
            }
            if (!reflexive && i == j) {
                continue;
            }

            final Attribute aj = single.attribute(j);

            final String name = ai.name() + "_x_" + aj.name();
            pair_attributes.add(new Attribute(name));
        }
    }

    String pair_name = single.relationName();
    pair_name += "_x";
    if (reflexive) {
        pair_name += "r";
    }
    if (symmetric) {
        pair_name += "s";
    }
    pair_name += "_";
    pair_name += single.relationName();
    final Instances result = new Instances(pair_name, pair_attributes, 0);

    for (final Instance inst : single) {
        final double[] xp = new double[pair_attributes.size()];
        int idx = 0;
        for (int i = 0; i < single.numAttributes(); ++i) {
            if (i == c) {
                continue;
            }
            final double xi = inst.value(i);
            final int j0 = (symmetric ? 0 : i);
            for (int j = j0; j < single.numAttributes(); ++j) {
                if (j == c) {
                    continue;
                }
                if (!reflexive && i == j) {
                    continue;
                }
                final double xj = inst.value(j);
                xp[idx++] = xi * xj;
            }
        }
        WekaUtil.addInstance(result, new DenseInstance(inst.weight(), xp));
    }

    return result;
}

From source file:edu.stanford.rsl.conrad.segmentation.GridFeatureExtractor.java

License:Open Source License

public void saveInstances(String s) throws IOException {
    if (Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) != null) {
        BufferedWriter bw = new BufferedWriter(new FileWriter(
                Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) + "_"
                        + s));//from   ww w  .j  a v a  2s  .c o  m
        System.out.println("Saving: " + s);

        //bw.write(getInstances().toString());

        Instances inst = getInstances();
        StringBuffer text = new StringBuffer();

        text.append("@relation").append(" ").append(Utils.quote("testing")).append("\n\n");
        for (int i = 0; i < inst.numAttributes(); i++) {
            text.append(inst.attribute(i)).append("\n");
        }
        text.append("\n").append("@data").append("\n");
        bw.write(text.toString());

        for (int i = 0; i < inst.numInstances(); i++) {
            text = new StringBuffer();
            text.append(inst.instance(i));
            if (i < inst.numInstances() - 1) {
                text.append('\n');
            }
            bw.write(text.toString());
        }
        bw.flush();
        bw.close();
        System.out.println("Done.");
    }
}

From source file:eksploracja.Eksploracja.java

/**
 * @param args the command line arguments
 *//*w ww  .  jav a2  s  .c  om*/
public static void main(String[] args) throws Exception {
    // TODO code application logic here
    //sout +tabualcja

    System.out.println("Hello world - tu eksploracja");

    //Pobieranie danych
    String filename = "C:\\Program Files\\Weka-3-8\\data\\weather.numeric.arff";
    DataSource source = new DataSource(filename);
    Instances mojeDane = source.getDataSet();

    //Wywietlanie danych
    System.out.println("Dane: ");
    // System.out.println(mojeDane); //cao danych

    Instance wiersz0 = mojeDane.firstInstance();

    System.out.println("Pocztek " + mojeDane.firstInstance()); //pierwszy wiersz
    System.out.println("Koniec " + mojeDane.lastInstance()); //ostatni wiersz

    System.out.println("\nLiczba danych: " + mojeDane.numInstances());
    System.out.println("\nAtrybuty w liczbie: " + mojeDane.numAttributes());

    for (int i = 0; i < mojeDane.numAttributes(); i++) {
        System.out.println(i + ". " + mojeDane.attribute(i));
        Attribute atr = mojeDane.attribute(i);
        System.out.println(i + " " + atr.name());

        if (atr.isNominal()) {
            System.out.println("Typ danych nominalne");
        } else {
            System.out.println("Typ danych numeryczne");
        }
    }

    //Zapisywanie danych w posataci liczbowej
    System.out.println("Dane - jako liczby: ");
    System.out.println(Arrays.toString(wiersz0.toDoubleArray()));

}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the polarity detection training functionalities.
 *
 * @throws IOException//ww  w.  j av  a 2  s.  co m
 * input output exception if problems with corpora
 */
public final void trainATP(final InputStream inputStream) throws IOException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    String validation = parsedArguments.getString("validation");
    String lang = parsedArguments.getString("language");
    String classes = parsedArguments.getString("classnum");
    int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
    //boolean printPreds = parsedArguments.getBoolean("printPreds");

    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);
    System.err.println("trainATP : Corpus read, creating features");
    Features atpTrain = new Features(reader, paramFile, classes);
    Instances traindata;
    if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) {
        traindata = atpTrain.loadInstancesTAB(true, "atp");
    } else if (corpusFormat.equalsIgnoreCase("tabNotagged") && lang.equalsIgnoreCase("eu")) {
        traindata = atpTrain.loadInstancesConll(true, "atp");
    } else {
        traindata = atpTrain.loadInstances(true, "atp");
    }

    //setting class attribute (entCat|attCat|entAttCat|polarityCat)
    traindata.setClass(traindata.attribute("polarityCat"));
    WekaWrapper classify;
    try {
        Properties params = new Properties();
        params.load(new FileInputStream(paramFile));
        String modelPath = params.getProperty("fVectorDir");
        classify = new WekaWrapper(traindata, true);
        classify.saveModel(modelPath + File.separator + "elixa-atp_" + lang + ".model");
        switch (validation) {
        case "cross":
            classify.crossValidate(foldNum);
            break;
        case "trainTest":
            classify.trainTest();
            break;
        case "both":
            classify.crossValidate(foldNum);
            classify.trainTest();
            break;
        default:
            System.out.println("train-atp: wrong validation option. Model saved but not tested");
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the polarity tagging functionalities. Target based polarity. 
 *
 * @throws IOException//w w  w.  java2 s .c om
 * input output exception if problems with corpora
 */
public final void evalATP(final InputStream inputStream) throws IOException, JDOMException {

    String paramFile = parsedArguments.getString("params");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    String model = parsedArguments.getString("model");
    String lang = parsedArguments.getString("language");
    String classnum = parsedArguments.getString("classnum");
    boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier");
    boolean printPreds = parsedArguments.getBoolean("outputPredictions");

    //Read corpus sentences
    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);

    //Rule-based Classifier.
    if (ruleBased) {
        Properties params = new Properties();
        params.load(new FileInputStream(new File(paramFile)));

        String posModelPath = params.getProperty("pos-model");
        String lemmaModelPath = params.getProperty("lemma-model");
        String kafDir = params.getProperty("kafDir");

        /* polarity lexicon. Domain specific polarity lexicon is given priority.
         * If no domain lexicon is found it reverts to general polarity lexicon.
         * If no general polarity lexicon is found program exits with error message.
        */
        String lex = params.getProperty("polarLexiconDomain", "none");
        if (lex.equalsIgnoreCase("none")) {
            lex = params.getProperty("polarLexiconGeneral", "none");
            if (lex.equalsIgnoreCase("none")) {
                System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity"
                        + " lexicon has been specified. Either specify one or choose ML classifier");
                System.exit(1);
            }
        }
        File lexFile = new File(lex);
        Evaluator evalDoc = new Evaluator(lexFile, "lemma");

        for (String oId : reader.getOpinions().keySet()) {
            // sentence posTagging
            String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath,
                    lemmaModelPath);
            //process the postagged sentence with the word count based polarity tagger
            Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName());
            String lblStr = results.get("polarity");
            String actual = "?";
            if (reader.getOpinion(oId).getPolarity() != null) {
                actual = reader.getOpinion(oId).getPolarity();
            }
            String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
            System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId));
            reader.getOpinion(oId).setPolarity(lblStr);
        }
    }
    //ML Classifier (default)
    else {
        Features atpTest = new Features(reader, paramFile, classnum, model);
        Instances testdata;
        if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) {
            testdata = atpTest.loadInstancesTAB(true, "atp");
        } else {
            testdata = atpTest.loadInstances(true, "atp");
        }
        //   setting class attribute (entCat|attCat|entAttCat|polarityCat)
        testdata.setClass(testdata.attribute("polarityCat"));

        WekaWrapper classify;
        try {
            classify = new WekaWrapper(model);

            System.err.println("evalAtp : going to test the model");
            //sort according to the instanceId
            //traindata.sort(atpTrain.getAttIndexes().get("instanceId"));
            //Instances testdata = new Instances(traindata);
            //testdata.deleteAttributeAt(0);
            //classify.setTestdata(testdata);
            classify.setTestdata(testdata);
            classify.testModel(model);

            if (printPreds) {
                for (String oId : reader.getOpinions().keySet()) {
                    int iId = atpTest.getOpinInst().get(oId);
                    Instance i = testdata.get(iId - 1);
                    double label = classify.getMLclass().classifyInstance(i);
                    String lblStr = i.classAttribute().value((int) label);
                    String actual = "?";
                    if (reader.getOpinion(oId).getPolarity() != null) {
                        actual = reader.getOpinion(oId).getPolarity();
                    }
                    String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
                    String oSent = reader.getOpinionSentence(oId);
                    if (corpusFormat.startsWith("tab")) {
                        StringBuilder sb = new StringBuilder();
                        for (String kk : oSent.split("\n")) {
                            sb.append(kk.split("\\t")[0]);
                            sb.append(" ");
                        }
                        oSent = sb.toString();
                    }

                    System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t"
                            + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::"));
                    reader.getOpinion(oId).setPolarity(lblStr);
                }
            }
            //reader.print2Semeval2015format(model+"tagATP.xml");
            //reader.print2conll(model+"tagAtp.conll");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the polarity tagging functionalities. Target based polarity. 
 *
 * @throws IOException/*from   w ww .  j  a va2  s .  c o  m*/
 * input output exception if problems with corpora
 * @throws JDOMException 
 */
public final void tagATP(final InputStream inputStream) throws IOException, JDOMException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    String model = parsedArguments.getString("model");
    String lang = parsedArguments.getString("language");
    String classnum = parsedArguments.getString("classnum");
    boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier");

    //Read corpus sentences
    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang);

    //Rule-based Classifier.
    if (ruleBased) {
        Properties params = new Properties();
        params.load(new FileInputStream(new File(paramFile)));

        String posModelPath = params.getProperty("pos-model");
        String lemmaModelPath = params.getProperty("lemma-model");
        String kafDir = params.getProperty("kafDir");

        /* polarity lexicon. Domain specific polarity lexicon is given priority.
         * If no domain lexicon is found it reverts to general polarity lexicon.
         * If no general polarity lexicon is found program exits with error message.
        */
        String lex = params.getProperty("polarLexiconDomain", "none");
        if (lex.equalsIgnoreCase("none")) {
            lex = params.getProperty("polarLexiconGeneral", "none");
            if (lex.equalsIgnoreCase("none")) {
                System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity"
                        + " lexicon has been specified. Either specify one or choose ML classifier");
                System.exit(1);
            }
        }
        File lexFile = new File(lex);
        Evaluator evalDoc = new Evaluator(lexFile, "lemma");

        for (String oId : reader.getOpinions().keySet()) {
            // sentence posTagging
            String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath,
                    lemmaModelPath);
            //process the postagged sentence with the word count based polarity tagger
            Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName());
            String lblStr = results.get("polarity");
            String actual = "?";
            if (reader.getOpinion(oId).getPolarity() != null) {
                actual = reader.getOpinion(oId).getPolarity();
            }
            String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
            System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId));
            reader.getOpinion(oId).setPolarity(lblStr);
        }
    } else {
        Features atpTrain = new Features(reader, paramFile, classnum, model);
        Instances traindata;
        if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) {
            traindata = atpTrain.loadInstancesTAB(true, "atp");
        } else if (lang.equalsIgnoreCase("eu")
                && (corpusFormat.equalsIgnoreCase("tabNotagged") || corpusFormat.equalsIgnoreCase("ireom"))) {
            traindata = atpTrain.loadInstancesConll(true, "atp");
        } else {
            traindata = atpTrain.loadInstances(true, "atp");
        }

        //   setting class attribute (entCat|attCat|entAttCat|polarityCat)
        traindata.setClass(traindata.attribute("polarityCat"));

        WekaWrapper classify;
        try {
            classify = new WekaWrapper(model);

            System.err.println();
            //sort according to the instanceId
            //traindata.sort(atpTrain.getAttIndexes().get("instanceId"));
            //Instances testdata = new Instances(traindata);
            //testdata.deleteAttributeAt(0);
            //classify.setTestdata(testdata);
            classify.setTestdata(traindata);
            classify.loadModel(model);

            for (String oId : reader.getOpinions().keySet()) {
                int iId = atpTrain.getOpinInst().get(oId);
                Instance i = traindata.get(iId - 1);
                double label = classify.getMLclass().classifyInstance(i);
                String lblStr = i.classAttribute().value((int) label);
                String actual = "?";
                if (reader.getOpinion(oId).getPolarity() != null) {
                    actual = reader.getOpinion(oId).getPolarity();
                }
                String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", "");
                String oSent = reader.getOpinionSentence(oId);
                if (corpusFormat.startsWith("tab")) {
                    StringBuilder sb = new StringBuilder();
                    for (String kk : oSent.split("\n")) {
                        sb.append(kk.split("\\t")[0]);
                        sb.append(" ");
                    }
                    oSent = sb.toString();
                }

                System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t"
                        + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::"));
                reader.getOpinion(oId).setPolarity(lblStr);
            }

            //reader.print2Semeval2015format(model+"tagATP.xml");
            //reader.print2conll(model+"tagAtp.conll");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the train-atc functionalities.
 * Train ATC using a single classifier (one vs. all) for E#A aspect categories.
 * // w  w w  .j  a  v  a  2  s.  co m
 * @throws Exception 
 */
public final void trainATC(final InputStream inputStream) throws IOException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    //String validation = parsedArguments.getString("validation");
    int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
    String lang = parsedArguments.getString("language");
    //boolean printPreds = parsedArguments.getBoolean("printPreds");
    boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences");
    //double threshold = 0.2;
    //String modelsPath = "/home/inaki/Proiektuak/BOM/SEMEVAL2015/ovsaModels";

    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang);
    Features atcTrain = new Features(reader, paramFile, "3");
    Instances traindata = atcTrain.loadInstances(true, "atc");

    //setting class attribute (entCat|attCat|entAttCat|polarityCat)

    //HashMap<String, Integer> opInst = atcTrain.getOpinInst();
    WekaWrapper classifyEnts;
    WekaWrapper classifyAtts;
    //WekaWrapper onevsall;
    try {
        //train first classifier (entities)
        Instances traindataEnt = new Instances(traindata);
        // IMPORTANT: filter indexes are added 1 because weka remove function counts attributes from 1, 
        traindataEnt.setClassIndex(traindataEnt.attribute("entCat").index());
        classifyEnts = new WekaWrapper(traindataEnt, true);
        String filtRange = String.valueOf(traindata.attribute("attCat").index() + 1) + ","
                + String.valueOf(traindata.attribute("entAttCat").index() + 1);
        classifyEnts.filterAttribute(filtRange);

        System.out.println("trainATC: entity classifier results -> ");
        classifyEnts.crossValidate(foldNum);
        classifyEnts.saveModel("elixa-atc_ent-" + lang + ".model");

        //Classifier entityCl = classify.getMLclass();

        //train second classifier (attributes)
        Instances traindataAtt = new Instances(traindata);
        traindataAtt.setClassIndex(traindataAtt.attribute("attCat").index());
        classifyAtts = new WekaWrapper(traindataAtt, true);
        filtRange = String.valueOf(traindataAtt.attribute("entAttCat").index() + 1);
        classifyAtts.filterAttribute(filtRange);

        System.out.println("trainATC: attribute classifier results -> ");
        classifyAtts.crossValidate(foldNum);
        classifyAtts.saveModel("elixa-atc_att-" + lang + ".model");
        /*
        Instances traindataEntadded = classifyEnts.addClassification(classifyEnts.getMLclass(), traindataEnt);
        //train second classifier (entCat attributes will have the values of the entities always)
        traindataEntadded.setClassIndex(traindataEntadded.attribute("attCat").index());
        WekaWrapper classify2 = new WekaWrapper(traindataEntadded, true);
        System.out.println("trainATC: enhanced attribute classifier results -> ");
        classify2.saveModel("elixa-atc_att_enhanced.model");
        classify2.crossValidate(foldNum);      
        */
        //classify.printMultilabelPredictions(classify.multiLabelPrediction());      */   

        //reader.print2Semeval2015format(paramFile+"entAttCat.xml");
    } catch (Exception e) {
        e.printStackTrace();
    }

    //traindata.setClass(traindata.attribute("entAttCat"));
    System.err.println("DONE CLI train-atc");
}