List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
/** * Returns a list of all Attributes, *including* the class attribute if * it is set. Note that using Instance.enumerateAttributes() will *skip* * the class attribute./*from w w w . java 2s.c o m*/ * @param instances * @return */ public static ArrayList<Attribute> extractAttributes(final Instances instances) { final ArrayList<Attribute> attributes = new ArrayList<Attribute>(instances.numAttributes()); for (int i = 0; i < instances.numAttributes(); ++i) { attributes.add(instances.attribute(i)); } return attributes; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
/** * Returns a list of all Attributes, *excluding* the class attribute if * it is set./*from w ww . ja v a 2s. c om*/ * @param instances * @return */ public static ArrayList<Attribute> extractUnlabeledAttributes(final Instances instances) { final ArrayList<Attribute> attributes = new ArrayList<Attribute>(instances.numAttributes()); for (int i = 0; i < instances.numAttributes(); ++i) { if (i == instances.classIndex()) { continue; } attributes.add(instances.attribute(i)); } return attributes; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
public static Instances powerSet(final Instances D, final int n) { final Attribute class_attr = D.classAttribute(); final ImmutableSet.Builder<Integer> b = new ImmutableSet.Builder<Integer>(); final int Nattr = class_attr != null ? D.numAttributes() - 1 : D.numAttributes(); for (final int i : Fn.range(1, Nattr)) { b.add(i);/* w w w .j av a2 s .c o m*/ } final Set<Set<Integer>> index = Sets.powerSet(b.build()); final ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (final Set<Integer> subset : index) { if (subset.isEmpty() || subset.size() > n) { continue; } final StringBuilder attr_name = new StringBuilder(); int count = 0; for (final Integer i : subset) { if (count++ > 0) { attr_name.append("_x_"); } attr_name.append(D.attribute(i).name()); } attributes.add(new Attribute(attr_name.toString())); } if (class_attr != null) { assert (class_attr.isNominal()); attributes.add(WekaUtil.createNominalAttribute(class_attr.name(), class_attr.numValues())); } final String Pname = "P" + n + "_" + D.relationName(); final Instances P = new Instances(Pname, attributes, 0); if (class_attr != null) { P.setClassIndex(attributes.size() - 1); } for (final Instance inst : D) { final double[] xp = new double[attributes.size()]; int idx = 0; for (final Set<Integer> subset : index) { if (subset.isEmpty() || subset.size() > n) { continue; } double p = 1.0; for (final Integer i : subset) { p *= inst.value(i); } xp[idx++] = p; } if (class_attr != null) { xp[idx++] = inst.classValue(); } WekaUtil.addInstance(P, new DenseInstance(inst.weight(), xp)); } return P; }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
public static Instances allPairwiseProducts(final Instances single, final boolean reflexive, final boolean symmetric) { final int c = single.classIndex(); System.out.println("Class attribute = " + c); final ArrayList<Attribute> pair_attributes = new ArrayList<Attribute>(); for (int i = 0; i < single.numAttributes(); ++i) { if (i == c) { continue; }// ww w . ja v a 2s . c o m final Attribute ai = single.attribute(i); final int j0 = (symmetric ? 0 : i); for (int j = j0; j < single.numAttributes(); ++j) { if (j == c) { continue; } if (!reflexive && i == j) { continue; } final Attribute aj = single.attribute(j); final String name = ai.name() + "_x_" + aj.name(); pair_attributes.add(new Attribute(name)); } } String pair_name = single.relationName(); pair_name += "_x"; if (reflexive) { pair_name += "r"; } if (symmetric) { pair_name += "s"; } pair_name += "_"; pair_name += single.relationName(); final Instances result = new Instances(pair_name, pair_attributes, 0); for (final Instance inst : single) { final double[] xp = new double[pair_attributes.size()]; int idx = 0; for (int i = 0; i < single.numAttributes(); ++i) { if (i == c) { continue; } final double xi = inst.value(i); final int j0 = (symmetric ? 0 : i); for (int j = j0; j < single.numAttributes(); ++j) { if (j == c) { continue; } if (!reflexive && i == j) { continue; } final double xj = inst.value(j); xp[idx++] = xi * xj; } } WekaUtil.addInstance(result, new DenseInstance(inst.weight(), xp)); } return result; }
From source file:edu.stanford.rsl.conrad.segmentation.GridFeatureExtractor.java
License:Open Source License
public void saveInstances(String s) throws IOException { if (Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) != null) { BufferedWriter bw = new BufferedWriter(new FileWriter( Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) + "_" + s));//from ww w .j a v a 2s .c o m System.out.println("Saving: " + s); //bw.write(getInstances().toString()); Instances inst = getInstances(); StringBuffer text = new StringBuffer(); text.append("@relation").append(" ").append(Utils.quote("testing")).append("\n\n"); for (int i = 0; i < inst.numAttributes(); i++) { text.append(inst.attribute(i)).append("\n"); } text.append("\n").append("@data").append("\n"); bw.write(text.toString()); for (int i = 0; i < inst.numInstances(); i++) { text = new StringBuffer(); text.append(inst.instance(i)); if (i < inst.numInstances() - 1) { text.append('\n'); } bw.write(text.toString()); } bw.flush(); bw.close(); System.out.println("Done."); } }
From source file:eksploracja.Eksploracja.java
/** * @param args the command line arguments *//*w ww . jav a2 s .c om*/ public static void main(String[] args) throws Exception { // TODO code application logic here //sout +tabualcja System.out.println("Hello world - tu eksploracja"); //Pobieranie danych String filename = "C:\\Program Files\\Weka-3-8\\data\\weather.numeric.arff"; DataSource source = new DataSource(filename); Instances mojeDane = source.getDataSet(); //Wywietlanie danych System.out.println("Dane: "); // System.out.println(mojeDane); //cao danych Instance wiersz0 = mojeDane.firstInstance(); System.out.println("Pocztek " + mojeDane.firstInstance()); //pierwszy wiersz System.out.println("Koniec " + mojeDane.lastInstance()); //ostatni wiersz System.out.println("\nLiczba danych: " + mojeDane.numInstances()); System.out.println("\nAtrybuty w liczbie: " + mojeDane.numAttributes()); for (int i = 0; i < mojeDane.numAttributes(); i++) { System.out.println(i + ". " + mojeDane.attribute(i)); Attribute atr = mojeDane.attribute(i); System.out.println(i + " " + atr.name()); if (atr.isNominal()) { System.out.println("Typ danych nominalne"); } else { System.out.println("Typ danych numeryczne"); } } //Zapisywanie danych w posataci liczbowej System.out.println("Dane - jako liczby: "); System.out.println(Arrays.toString(wiersz0.toDoubleArray())); }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the polarity detection training functionalities. * * @throws IOException//ww w. j av a 2 s. co m * input output exception if problems with corpora */ public final void trainATP(final InputStream inputStream) throws IOException { // load training parameters file String paramFile = parsedArguments.getString("params"); String corpusFormat = parsedArguments.getString("corpusFormat"); String validation = parsedArguments.getString("validation"); String lang = parsedArguments.getString("language"); String classes = parsedArguments.getString("classnum"); int foldNum = Integer.parseInt(parsedArguments.getString("foldNum")); //boolean printPreds = parsedArguments.getBoolean("printPreds"); CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang); System.err.println("trainATP : Corpus read, creating features"); Features atpTrain = new Features(reader, paramFile, classes); Instances traindata; if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) { traindata = atpTrain.loadInstancesTAB(true, "atp"); } else if (corpusFormat.equalsIgnoreCase("tabNotagged") && lang.equalsIgnoreCase("eu")) { traindata = atpTrain.loadInstancesConll(true, "atp"); } else { traindata = atpTrain.loadInstances(true, "atp"); } //setting class attribute (entCat|attCat|entAttCat|polarityCat) traindata.setClass(traindata.attribute("polarityCat")); WekaWrapper classify; try { Properties params = new Properties(); params.load(new FileInputStream(paramFile)); String modelPath = params.getProperty("fVectorDir"); classify = new WekaWrapper(traindata, true); classify.saveModel(modelPath + File.separator + "elixa-atp_" + lang + ".model"); switch (validation) { case "cross": classify.crossValidate(foldNum); break; case "trainTest": classify.trainTest(); break; case "both": classify.crossValidate(foldNum); classify.trainTest(); break; default: System.out.println("train-atp: wrong validation option. Model saved but not tested"); } } catch (Exception e) { e.printStackTrace(); } }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the polarity tagging functionalities. Target based polarity. * * @throws IOException//w w w. java2 s .c om * input output exception if problems with corpora */ public final void evalATP(final InputStream inputStream) throws IOException, JDOMException { String paramFile = parsedArguments.getString("params"); String corpusFormat = parsedArguments.getString("corpusFormat"); String model = parsedArguments.getString("model"); String lang = parsedArguments.getString("language"); String classnum = parsedArguments.getString("classnum"); boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier"); boolean printPreds = parsedArguments.getBoolean("outputPredictions"); //Read corpus sentences CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang); //Rule-based Classifier. if (ruleBased) { Properties params = new Properties(); params.load(new FileInputStream(new File(paramFile))); String posModelPath = params.getProperty("pos-model"); String lemmaModelPath = params.getProperty("lemma-model"); String kafDir = params.getProperty("kafDir"); /* polarity lexicon. Domain specific polarity lexicon is given priority. * If no domain lexicon is found it reverts to general polarity lexicon. * If no general polarity lexicon is found program exits with error message. */ String lex = params.getProperty("polarLexiconDomain", "none"); if (lex.equalsIgnoreCase("none")) { lex = params.getProperty("polarLexiconGeneral", "none"); if (lex.equalsIgnoreCase("none")) { System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity" + " lexicon has been specified. Either specify one or choose ML classifier"); System.exit(1); } } File lexFile = new File(lex); Evaluator evalDoc = new Evaluator(lexFile, "lemma"); for (String oId : reader.getOpinions().keySet()) { // sentence posTagging String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath, lemmaModelPath); //process the postagged sentence with the word count based polarity tagger Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName()); String lblStr = results.get("polarity"); String actual = "?"; if (reader.getOpinion(oId).getPolarity() != null) { actual = reader.getOpinion(oId).getPolarity(); } String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", ""); System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId)); reader.getOpinion(oId).setPolarity(lblStr); } } //ML Classifier (default) else { Features atpTest = new Features(reader, paramFile, classnum, model); Instances testdata; if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) { testdata = atpTest.loadInstancesTAB(true, "atp"); } else { testdata = atpTest.loadInstances(true, "atp"); } // setting class attribute (entCat|attCat|entAttCat|polarityCat) testdata.setClass(testdata.attribute("polarityCat")); WekaWrapper classify; try { classify = new WekaWrapper(model); System.err.println("evalAtp : going to test the model"); //sort according to the instanceId //traindata.sort(atpTrain.getAttIndexes().get("instanceId")); //Instances testdata = new Instances(traindata); //testdata.deleteAttributeAt(0); //classify.setTestdata(testdata); classify.setTestdata(testdata); classify.testModel(model); if (printPreds) { for (String oId : reader.getOpinions().keySet()) { int iId = atpTest.getOpinInst().get(oId); Instance i = testdata.get(iId - 1); double label = classify.getMLclass().classifyInstance(i); String lblStr = i.classAttribute().value((int) label); String actual = "?"; if (reader.getOpinion(oId).getPolarity() != null) { actual = reader.getOpinion(oId).getPolarity(); } String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", ""); String oSent = reader.getOpinionSentence(oId); if (corpusFormat.startsWith("tab")) { StringBuilder sb = new StringBuilder(); for (String kk : oSent.split("\n")) { sb.append(kk.split("\\t")[0]); sb.append(" "); } oSent = sb.toString(); } System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t" + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::")); reader.getOpinion(oId).setPolarity(lblStr); } } //reader.print2Semeval2015format(model+"tagATP.xml"); //reader.print2conll(model+"tagAtp.conll"); } catch (Exception e) { e.printStackTrace(); } } }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the polarity tagging functionalities. Target based polarity. * * @throws IOException/*from w ww . j a va2 s . c o m*/ * input output exception if problems with corpora * @throws JDOMException */ public final void tagATP(final InputStream inputStream) throws IOException, JDOMException { // load training parameters file String paramFile = parsedArguments.getString("params"); String corpusFormat = parsedArguments.getString("corpusFormat"); String model = parsedArguments.getString("model"); String lang = parsedArguments.getString("language"); String classnum = parsedArguments.getString("classnum"); boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier"); //Read corpus sentences CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang); //Rule-based Classifier. if (ruleBased) { Properties params = new Properties(); params.load(new FileInputStream(new File(paramFile))); String posModelPath = params.getProperty("pos-model"); String lemmaModelPath = params.getProperty("lemma-model"); String kafDir = params.getProperty("kafDir"); /* polarity lexicon. Domain specific polarity lexicon is given priority. * If no domain lexicon is found it reverts to general polarity lexicon. * If no general polarity lexicon is found program exits with error message. */ String lex = params.getProperty("polarLexiconDomain", "none"); if (lex.equalsIgnoreCase("none")) { lex = params.getProperty("polarLexiconGeneral", "none"); if (lex.equalsIgnoreCase("none")) { System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity" + " lexicon has been specified. Either specify one or choose ML classifier"); System.exit(1); } } File lexFile = new File(lex); Evaluator evalDoc = new Evaluator(lexFile, "lemma"); for (String oId : reader.getOpinions().keySet()) { // sentence posTagging String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath, lemmaModelPath); //process the postagged sentence with the word count based polarity tagger Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName()); String lblStr = results.get("polarity"); String actual = "?"; if (reader.getOpinion(oId).getPolarity() != null) { actual = reader.getOpinion(oId).getPolarity(); } String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", ""); System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId)); reader.getOpinion(oId).setPolarity(lblStr); } } else { Features atpTrain = new Features(reader, paramFile, classnum, model); Instances traindata; if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) { traindata = atpTrain.loadInstancesTAB(true, "atp"); } else if (lang.equalsIgnoreCase("eu") && (corpusFormat.equalsIgnoreCase("tabNotagged") || corpusFormat.equalsIgnoreCase("ireom"))) { traindata = atpTrain.loadInstancesConll(true, "atp"); } else { traindata = atpTrain.loadInstances(true, "atp"); } // setting class attribute (entCat|attCat|entAttCat|polarityCat) traindata.setClass(traindata.attribute("polarityCat")); WekaWrapper classify; try { classify = new WekaWrapper(model); System.err.println(); //sort according to the instanceId //traindata.sort(atpTrain.getAttIndexes().get("instanceId")); //Instances testdata = new Instances(traindata); //testdata.deleteAttributeAt(0); //classify.setTestdata(testdata); classify.setTestdata(traindata); classify.loadModel(model); for (String oId : reader.getOpinions().keySet()) { int iId = atpTrain.getOpinInst().get(oId); Instance i = traindata.get(iId - 1); double label = classify.getMLclass().classifyInstance(i); String lblStr = i.classAttribute().value((int) label); String actual = "?"; if (reader.getOpinion(oId).getPolarity() != null) { actual = reader.getOpinion(oId).getPolarity(); } String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", ""); String oSent = reader.getOpinionSentence(oId); if (corpusFormat.startsWith("tab")) { StringBuilder sb = new StringBuilder(); for (String kk : oSent.split("\n")) { sb.append(kk.split("\\t")[0]); sb.append(" "); } oSent = sb.toString(); } System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t" + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::")); reader.getOpinion(oId).setPolarity(lblStr); } //reader.print2Semeval2015format(model+"tagATP.xml"); //reader.print2conll(model+"tagAtp.conll"); } catch (Exception e) { e.printStackTrace(); } } }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the train-atc functionalities. * Train ATC using a single classifier (one vs. all) for E#A aspect categories. * // w w w .j a v a 2 s. co m * @throws Exception */ public final void trainATC(final InputStream inputStream) throws IOException { // load training parameters file String paramFile = parsedArguments.getString("params"); String corpusFormat = parsedArguments.getString("corpusFormat"); //String validation = parsedArguments.getString("validation"); int foldNum = Integer.parseInt(parsedArguments.getString("foldNum")); String lang = parsedArguments.getString("language"); //boolean printPreds = parsedArguments.getBoolean("printPreds"); boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences"); //double threshold = 0.2; //String modelsPath = "/home/inaki/Proiektuak/BOM/SEMEVAL2015/ovsaModels"; CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang); Features atcTrain = new Features(reader, paramFile, "3"); Instances traindata = atcTrain.loadInstances(true, "atc"); //setting class attribute (entCat|attCat|entAttCat|polarityCat) //HashMap<String, Integer> opInst = atcTrain.getOpinInst(); WekaWrapper classifyEnts; WekaWrapper classifyAtts; //WekaWrapper onevsall; try { //train first classifier (entities) Instances traindataEnt = new Instances(traindata); // IMPORTANT: filter indexes are added 1 because weka remove function counts attributes from 1, traindataEnt.setClassIndex(traindataEnt.attribute("entCat").index()); classifyEnts = new WekaWrapper(traindataEnt, true); String filtRange = String.valueOf(traindata.attribute("attCat").index() + 1) + "," + String.valueOf(traindata.attribute("entAttCat").index() + 1); classifyEnts.filterAttribute(filtRange); System.out.println("trainATC: entity classifier results -> "); classifyEnts.crossValidate(foldNum); classifyEnts.saveModel("elixa-atc_ent-" + lang + ".model"); //Classifier entityCl = classify.getMLclass(); //train second classifier (attributes) Instances traindataAtt = new Instances(traindata); traindataAtt.setClassIndex(traindataAtt.attribute("attCat").index()); classifyAtts = new WekaWrapper(traindataAtt, true); filtRange = String.valueOf(traindataAtt.attribute("entAttCat").index() + 1); classifyAtts.filterAttribute(filtRange); System.out.println("trainATC: attribute classifier results -> "); classifyAtts.crossValidate(foldNum); classifyAtts.saveModel("elixa-atc_att-" + lang + ".model"); /* Instances traindataEntadded = classifyEnts.addClassification(classifyEnts.getMLclass(), traindataEnt); //train second classifier (entCat attributes will have the values of the entities always) traindataEntadded.setClassIndex(traindataEntadded.attribute("attCat").index()); WekaWrapper classify2 = new WekaWrapper(traindataEntadded, true); System.out.println("trainATC: enhanced attribute classifier results -> "); classify2.saveModel("elixa-atc_att_enhanced.model"); classify2.crossValidate(foldNum); */ //classify.printMultilabelPredictions(classify.multiLabelPrediction()); */ //reader.print2Semeval2015format(paramFile+"entAttCat.xml"); } catch (Exception e) { e.printStackTrace(); } //traindata.setClass(traindata.attribute("entAttCat")); System.err.println("DONE CLI train-atc"); }