List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:kea.KEAFilter.java
License:Open Source License
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input * instance structure (any instances contained in the object are * ignored - only the structure is required). * @return true if the outputFormat may be collected immediately *///w ww. j av a 2 s . co m public boolean setInputFormat(Instances instanceInfo) throws Exception { if (instanceInfo.classIndex() >= 0) { throw new Exception("Don't know what do to if class index set!"); } if (!instanceInfo.attribute(m_KeyphrasesAtt).isString() || !instanceInfo.attribute(m_DocumentAtt).isString()) { throw new Exception("Keyphrase attribute and document attribute " + "need to be string attributes."); } m_PunctFilter = new KEAPhraseFilter(); int[] arr = new int[1]; arr[0] = m_DocumentAtt; m_PunctFilter.setAttributeIndicesArray(arr); m_PunctFilter.setInputFormat(instanceInfo); m_PunctFilter.setDisallowInternalPeriods(getDisallowInternalPeriods()); m_NumbersFilter = new NumbersFilter(); m_NumbersFilter.setInputFormat(m_PunctFilter.getOutputFormat()); super.setInputFormat(m_NumbersFilter.getOutputFormat()); return false; }
From source file:kea.KEAKeyphraseExtractor.java
License:Open Source License
/** * Builds the model from the files/*from ww w.j a va2s. c o m*/ */ public void extractKeyphrases(Hashtable stems) throws Exception { Vector stats = new Vector(); // Check whether there is actually any data if (stems.size() == 0) { throw new Exception("Couldn't find any data!"); } FastVector atts = new FastVector(2); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); // Extract keyphrases Enumeration elem = stems.keys(); while (elem.hasMoreElements()) { String str = (String) elem.nextElement(); double[] newInst = new double[2]; try { File txt = new File(m_dirName + "/" + str + ".txt"); Reader is; if (!m_encoding.equals("default")) { is = new BomStrippingInputStreamReader(new FileInputStream(txt), m_encoding); } else { is = new BomStrippingInputStreamReader(new FileInputStream(txt)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("Can't read document " + str + ".txt"); } newInst[0] = Instance.missingValue(); } try { File key = new File(m_dirName + "/" + str + ".key"); Reader is; if (!m_encoding.equals("default")) { is = new BomStrippingInputStreamReader(new FileInputStream(key), m_encoding); } else { is = new BomStrippingInputStreamReader(new FileInputStream(key)); } StringBuffer keyStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { keyStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("No keyphrases for stem " + str + "."); } newInst[1] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); m_KEAFilter.input(data.instance(0)); data = data.stringFreeStructure(); if (m_debug) { System.err.println("-- Document: " + str); } Instance[] topRankedInstances = new Instance[m_numPhrases]; Instance inst; while ((inst = m_KEAFilter.output()) != null) { int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1; if (index < m_numPhrases) { topRankedInstances[index] = inst; } } if (m_debug) { System.err.println("-- Keyphrases and feature values:"); } FileOutputStream out = null; PrintWriter printer = null; File key = new File(m_dirName + "/" + str + ".key"); if (!key.exists()) { out = new FileOutputStream(m_dirName + "/" + str + ".key"); if (!m_encoding.equals("default")) { printer = new PrintWriter(new OutputStreamWriter(out, m_encoding)); } else { printer = new PrintWriter(out); } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < m_numPhrases; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i] .value(topRankedInstances[i].numAttributes() - 1) == topRankedInstances[i] .attribute(topRankedInstances[i].numAttributes() - 1).indexOfValue("True")) { numCorrect += 1.0; } if (printer != null) { printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getUnstemmedPhraseIndex())); if (m_AdditionalInfo) { printer.print("\t"); printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getStemmedPhraseIndex())); printer.print("\t"); printer.print(Utils.doubleToString( topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()), 4)); } printer.println(); } if (m_debug) { System.err.println(topRankedInstances[i]); } } } if (numExtracted > 0) { if (m_debug) { System.err.println("-- " + numCorrect + " correct"); } stats.addElement(new Double(numCorrect)); } if (printer != null) { printer.flush(); printer.close(); out.close(); } } double[] st = new double[stats.size()]; for (int i = 0; i < stats.size(); i++) { st[i] = ((Double) stats.elementAt(i)).doubleValue(); } double avg = Utils.mean(st); double stdDev = Math.sqrt(Utils.variance(st)); System.err.println("Avg. number of correct keyphrases: " + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2)); System.err.println("Based on " + stats.size() + " documents"); m_KEAFilter.batchFinished(); }
From source file:kea.KEAModelBuilder.java
License:Open Source License
/** * Builds the model from the files/*from w w w .j a v a2s .co m*/ */ public void buildModel(Hashtable stems) throws Exception { // Check whether there is actually any data if (stems.size() == 0) { throw new Exception("Couldn't find any data!"); } FastVector atts = new FastVector(2); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); // Build model m_KEAFilter = new KEAFilter(); m_KEAFilter.setDebug(m_debug); m_KEAFilter.setDisallowInternalPeriods(getDisallowIPeriods()); m_KEAFilter.setKFused(getUseKFrequency()); m_KEAFilter.setMaxPhraseLength(getMaxPhraseLength()); m_KEAFilter.setMinPhraseLength(getMinPhraseLength()); m_KEAFilter.setMinNumOccur(getMinNumOccur()); m_KEAFilter.setInputFormat(data); m_KEAFilter.setStemmer(getStemmer()); m_KEAFilter.setStopwords(getStopwords()); m_KEAFilter.setCheckForProperNouns(getCheckForProperNouns()); Enumeration elem = stems.keys(); while (elem.hasMoreElements()) { String str = (String) elem.nextElement(); double[] newInst = new double[2]; try { File txt = new File(m_dirName + "/" + str + ".txt"); BufferedReader is; if (!m_encoding.equals("default")) { is = new BomStrippingInputStreamReader(new FileInputStream(txt), m_encoding); } else { is = new BomStrippingInputStreamReader(new FileInputStream(txt)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("Can't find document for stem " + str + "."); } newInst[0] = Instance.missingValue(); } try { File key = new File(m_dirName + "/" + str + ".key"); BufferedReader is; if (!m_encoding.equals("default")) { is = new BomStrippingInputStreamReader(new FileInputStream(key), m_encoding); } else { is = new BomStrippingInputStreamReader(new FileInputStream(key)); } StringBuffer keyStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { keyStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("Can't find keyphrases for stem " + str + "."); } newInst[1] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); m_KEAFilter.input(data.instance(0)); data = data.stringFreeStructure(); } m_KEAFilter.batchFinished(); // Get rid of instances in filter while (m_KEAFilter.output() != null) { } ; }
From source file:kea.main.KEAKeyphraseExtractor.java
License:Open Source License
/** * Builds the model from the files//w w w . j a v a 2 s . c o m */ public synchronized void extractKeyphrases(Hashtable stems) throws Exception { Vector stats = new Vector(); // Check whether there is actually any data // = if there any files in the directory if (stems.size() == 0) { throw new Exception("Couldn't find any data!"); } this.m_KEAFilter.setNumPhrases(m_numPhrases); this.m_KEAFilter.setVocabulary(m_vocabulary); this.m_KEAFilter.setVocabularyFormat(m_vocabularyFormat); this.m_KEAFilter.setDocumentLanguage(getDocumentLanguage()); this.m_KEAFilter.setStemmer(m_Stemmer); this.m_KEAFilter.setStopwords(m_Stopwords); if (getVocabulary().equals("none")) { this.m_KEAFilter.m_NODEfeature = false; } else { // Know thesaurus is loaded in the constructor //m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords, vocabularyDir, manager); } FastVector atts = new FastVector(3); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); atts.addElement(new Attribute("filename", (String) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); if (this.m_KEAFilter.m_Dictionary == null) { buildGlobalDictionaries(stems); } System.out.println("-- Extracting Keyphrases... "); // Extract keyphrases Enumeration elem = stems.keys(); // Enumeration over all files in the directory (now in the hash): while (elem.hasMoreElements()) { String str = (String) elem.nextElement(); double[] newInst = new double[2]; try { File txt = new File(m_dirName + "/" + str + ".txt"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(txt), m_encoding); } else { is = new InputStreamReader(new FileInputStream(txt)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } is.close(); newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("Can't read document " + str + ".txt"); } newInst[0] = Instance.missingValue(); } try { File key = new File(m_dirName + "/" + str + ".key"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(key), m_encoding); } else { is = new InputStreamReader(new FileInputStream(key)); } StringBuffer keyStr = new StringBuffer(); int c; // keyStr = keyphrases in the str.key file // Kea assumes, that these keyphrases were assigned by the // author // and evaluates extracted keyphrases againse these while ((c = is.read()) != -1) { keyStr.append((char) c); } is.close(); newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("No existing keyphrases for stem " + str + "."); } newInst[1] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); this.m_KEAFilter.input(data.instance(0), vocabulary); data = data.stringFreeStructure(); if (m_debug) { System.err.println("-- Document: " + str); } Instance[] topRankedInstances = new Instance[m_numPhrases]; Instance inst; // Iterating over all extracted keyphrases (inst) while ((inst = this.m_KEAFilter.output()) != null) { int index = (int) inst.value(this.m_KEAFilter.getRankIndex()) - 1; if (index < m_numPhrases) { topRankedInstances[index] = inst; } } if (m_debug) { System.err.println("-- Keyphrases and feature values:"); } FileOutputStream out = null; PrintWriter printer = null; File key = new File(m_dirName + "/" + str + ".key"); if (!key.exists()) { out = new FileOutputStream(m_dirName + "/" + str + ".key"); if (!m_encoding.equals("default")) { printer = new PrintWriter(new OutputStreamWriter(out, m_encoding)); } else { printer = new PrintWriter(out); } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < m_numPhrases; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) { numCorrect += 1.0; } if (printer != null) { printer.print( topRankedInstances[i].stringValue(this.m_KEAFilter.getUnstemmedPhraseIndex())); if (m_AdditionalInfo) { printer.print("\t"); printer.print( topRankedInstances[i].stringValue(this.m_KEAFilter.getStemmedPhraseIndex())); printer.print("\t"); printer.print(Utils.doubleToString( topRankedInstances[i].value(this.m_KEAFilter.getProbabilityIndex()), 4)); } printer.println(); } if (m_debug) { System.err.println(topRankedInstances[i]); } } } if (numExtracted > 0) { if (m_debug) { System.err.println("-- " + numCorrect + " correct"); } stats.addElement(new Double(numCorrect)); } if (printer != null) { printer.flush(); printer.close(); out.close(); } } double[] st = new double[stats.size()]; for (int i = 0; i < stats.size(); i++) { st[i] = ((Double) stats.elementAt(i)).doubleValue(); } double avg = Utils.mean(st); double stdDev = Math.sqrt(Utils.variance(st)); System.out.println("Avg. number of matching keyphrases compared to existing ones : " + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2)); System.out.println("Based on " + stats.size() + " documents"); // m_KEAFilter.batchFinished(); }
From source file:learn.Classification.Chinese.TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined by * a call to getStructure then method should do so before processing the * rest of the data set./*from ww w . j a v a 2s.co m*/ * * @return the structure of the data set as an empty set of Instances * @throws IOException * if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); ArrayList<String> classes = new ArrayList<String>(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.add((String) enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.get(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedInputStream is; is = new BufferedInputStream(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; /* * while ((c = is.read()) != -1) { txtStr.append((char) c); * } */ //FileReader fr = new FileReader(txt); BufferedReader br = new BufferedReader( new InputStreamReader(new FileInputStream(txt), "UTF-8")); String line; while ((line = br.readLine()) != null) { txtStr.append(line + "\n"); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new DenseInstance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
From source file:les.negocio.LerWeka.java
@Override public String processar(EntidadeDominio entidade) { Arquivo arq = (Arquivo) entidade;//from w w w .j a v a 2 s . com String path = "/home/gustavo/Documents/weka/"; String full_path = path + arq.getNomeDoArquivo(); List<String> nm_att = new ArrayList<String>(); int qt_att = 0; String s = null; BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(full_path)); } catch (FileNotFoundException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } ArffReader arff = null; try { arff = new ArffReader(reader); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } Instances data = arff.getData(); data.setClassIndex(data.numAttributes() - 1); int num_atributos = data.numAttributes() - 1; for (int i = 3; i < num_atributos; i++) { // o indice comea no trs, pois os atributos anteriores so fixados pelo sistema if (data.attribute(i).isNominal()) { qt_att++; nm_att.add(data.attribute(i).name().toString()); } } arq.setAtributos_weka(nm_att); arq.setQt_perguntas(qt_att); return null; }
From source file:lfsom.data.LFSData.java
License:Apache License
/** * Gets the data from a csv file.//from w ww . j a va 2 s . c o m * * @param fileName */ public LFSData(String fileName) { Class claseCargador = CSVLoader.class; if (fileName.endsWith(ArffLoader.FILE_EXTENSION)) { claseCargador = ArffLoader.class; } else { if (fileName.endsWith(JSONLoader.FILE_EXTENSION)) { claseCargador = JSONLoader.class; } else { if (fileName.endsWith(MatlabLoader.FILE_EXTENSION)) { claseCargador = MatlabLoader.class; } else { if (fileName.endsWith(XRFFLoader.FILE_EXTENSION)) { claseCargador = XRFFLoader.class; } else { if (fileName.endsWith(C45Loader.FILE_EXTENSION)) { claseCargador = C45Loader.class; } } } } } try { AbstractFileLoader cargador = (AbstractFileLoader) claseCargador.getConstructor().newInstance(); boolean cambio_col = false; cargador.setSource(new File(fileName)); Instances data1 = cargador.getDataSet(); double[][] matrix2 = new double[data1.size()][data1.numAttributes()]; for (int i = 0; i < data1.size(); i++) { matrix2[i] = data1.get(i).toDoubleArray(); } // Ahora se comprueba si todas las columnas son ok Integer[] colVale; dim = 0; if (data1.size() > 0) { colVale = new Integer[matrix2[0].length]; double[] stdevX = StatisticSample.stddeviation(matrix2); for (int k = 0; k < matrix2[0].length; k++) { if (Math.abs(stdevX[k]) >= 0.000000001) { colVale[k] = dim; dim++; } else { colVale[k] = -1; cambio_col = true; } } } else { dim = data1.numAttributes(); colVale = new Integer[dim]; for (int k = 0; k < dim; k++) { colVale[k] = k; } } double[][] matrixAssign = new double[matrix2.length][dim]; if (cambio_col) { for (int k = 0; k < matrix2.length; k++) { for (int w = 0; w < matrix2[0].length; w++) { if (colVale[w] != -1) { matrixAssign[k][colVale[w]] = matrix2[k][w]; } } } } else { matrixAssign = matrix2; } // Fin de la comprobacion setLabels(new String[dim]); for (int i = 0; i < data1.numAttributes(); i++) { if (colVale[i] != -1) { getLabels()[colVale[i]] = data1.attribute(i).name(); } } BufferedWriter br = new BufferedWriter(new FileWriter("d:/tmp/fich.csv")); StringBuilder sb = new StringBuilder(); for (int i = 0; i < matrixAssign.length; i++) { String cad = String.valueOf(matrixAssign[i][0]); for (int k = 1; k < matrixAssign[i].length; k++) cad += "," + matrixAssign[i][k]; sb.append(cad + "\n"); } br.write(sb.toString()); br.close(); setMatrix(matrixAssign); } catch (Exception e) { e.printStackTrace(); System.exit(1); } }
From source file:licensedetect.Classify.java
public String classify(Instances instances) { int dec;/*from www.j av a 2 s. c o m*/ Attribute a = instances.attribute(instances.numAttributes() - 1); try { dec = (int) multi.classifyInstance(instances.instance(0)); return a.value(dec); } catch (Exception e) { System.out.println("Error Classifying instance"); return Integer.toString(-1); } }
From source file:linqs.gaia.model.oc.ncc.WekaClassifier.java
License:Open Source License
@Override public void learn(Iterable<? extends Decorable> trainitems, String targetschemaid, String targetfeatureid, List<String> featureids) { try {// w ww . j a va 2s .c o m this.targetschemaid = targetschemaid; this.targetfeatureid = targetfeatureid; this.featureids = new LinkedList<String>(featureids); LinkedHashSet<String> uniquefids = new LinkedHashSet<String>(featureids); if (uniquefids.size() != featureids.size()) { Log.WARN("Duplicate feature ids found in set of features: " + featureids); this.featureids = new ArrayList<String>(uniquefids); } if (this.featureids.contains(this.targetfeatureid)) { throw new InvalidStateException( "Cannot include target feature as a dependency feature: " + this.targetfeatureid); } Log.DEBUG("Features Used: " + ListUtils.list2string(featureids, ",")); // Added for weka. Will only be used for training. // Target will not be used as a feature itself. this.featureids.add(this.targetfeatureid); String wcclass = WekaClassifier.DEFAULT_WEKA_CLASSIFIER; if (this.hasParameter("wekaclassifier")) { wcclass = this.getStringParameter("wekaclassifier"); } String wekaparams = WekaClassifier.NO_PARAMS; if (this.hasParameter("wekaparams")) { wekaparams = this.getStringParameter("wekaparams"); } boolean printwekamodel = this.hasParameter("printwekamodel", "yes"); // Support generation of class based cost matrix if (this.hasParameter("costbyclass", "yes")) { fclasscount = new KeyedCount<String>(); } // Weka instances int numinstances = IteratorUtils.numIterable(trainitems); Instances traininstances = this.gaia2weka(trainitems.iterator(), numinstances, false); // Handle class based cost matrix if (fclasscount != null) { if (wekaparams.equals(WekaClassifier.NO_PARAMS)) { wekaparams = ""; } else { wekaparams += ","; } wekaparams += "-cost-matrix," + this.getCostMatrix(); } // Set GAIA parameters and initialize classifier String params[] = null; if (!wekaparams.equals(WekaClassifier.NO_PARAMS)) { Log.DEBUG("Using wekaparams: " + wekaparams); params = wekaparams.split(","); } wekaclassifier = Classifier.forName(wcclass, params); // Train classifier if (this.hasParameter("wekatrainfile")) { String savefile = this.getStringParameter("wekatrainfile"); this.saveWekaInstances(savefile, traininstances); } Log.DEBUG("Weka building classifier"); SimpleTimer st = new SimpleTimer(); st.start(); wekaclassifier.buildClassifier(traininstances); Log.DEBUG("Weka done building classifier: (" + st.timeLapse(true) + ")"); // Print Weka Model, if requested if (printwekamodel) { Log.INFO("Learned Weka Model:\n" + this.wekaclassifier); } // Print attributes if (Log.SHOWDEBUG) { String features = null; for (int f = 0; f < traininstances.numAttributes(); f++) { if (features == null) { features = ""; } else { features += ","; } features += traininstances.attribute(f).name(); } String options[] = wekaclassifier.getOptions(); Log.DEBUG("Weka Options: " + ArrayUtils.array2String(options, ",")); } // Clear instances once training is complete traininstances.delete(); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:linqs.gaia.model.oc.ncc.WekaClassifier.java
License:Open Source License
/** * Create Weka instance/*from w ww . j a v a 2 s .c om*/ * * @param intances Weka instances * @param di Decorable item to convert * @param attInfo Weka attributes * @param ispredict Is this item created for training or testing */ private void createInstance(Instances instances, Decorable di, boolean ispredict) { double[] instvalues = new double[attinfosize]; int attindex = 0; Schema schema = di.getSchema(); for (String fid : featureids) { FeatureValue fvalue = di.getFeatureValue(fid); Attribute a = instances.attribute(attindex); Feature f = schema.getFeature(fid); if (!(f instanceof CompositeFeature)) { // Handle non multi-valued feature instvalues[attindex] = this.gaiavalues2weka(f, fid, fvalue, a, ispredict); attindex++; } else { // Handle multi-valued feature CompositeFeature mv = (CompositeFeature) f; UnmodifiableList<SimplePair<String, CVFeature>> mvfeatures = mv.getFeatures(); CompositeValue mvvalue = (CompositeValue) di.getFeatureValue(fid); UnmodifiableList<FeatureValue> mvfvalues = mvvalue.getFeatureValues(); int num = mvfvalues.size(); for (int j = 0; j < num; j++) { if (fvalue.equals(FeatureValue.UNKNOWN_VALUE)) { attindex++; continue; } a = instances.attribute(attindex); f = mvfeatures.get(j).getSecond(); fvalue = mvfvalues.get(j); instvalues[attindex] = this.gaiavalues2weka(f, fid, fvalue, a, ispredict); attindex++; } } } // Create instance of weight 1 and the specified values Instance inst = new SparseInstance(1, instvalues); inst.setDataset(instances); instances.add(inst); }