List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:com.entopix.maui.filters.MauiFilter.java
License:Open Source License
/** * Sets output format and converts pending input instances. *///from w ww . j a v a 2s . c om @SuppressWarnings("unchecked") private void convertPendingInstances() { if (debugMode) { log.info("--- Converting pending instances"); } // Create output format for filter FastVector atts = new FastVector(); for (int i = 1; i < getInputFormat().numAttributes(); i++) { if (i == documentAtt) { atts.addElement(new Attribute("Candidate_name", (FastVector) null)); // 0 atts.addElement(new Attribute("Candidate_original", (FastVector) null)); // 1 atts.addElement(new Attribute("Term_frequency")); // 0 atts.addElement(new Attribute("IDF")); // 1 atts.addElement(new Attribute("TFxIDF")); // 2 atts.addElement(new Attribute("First_occurrence")); // 3 atts.addElement(new Attribute("Last_occurrence")); // 4 atts.addElement(new Attribute("Spread")); // 5 atts.addElement(new Attribute("Domain_keyphraseness")); // 6 atts.addElement(new Attribute("Length")); // 7 atts.addElement(new Attribute("Generality")); // 8 atts.addElement(new Attribute("Node_degree")); // 9 atts.addElement(new Attribute("Wikipedia_keyphraseness")); // 10 atts.addElement(new Attribute("Wikipedia_inlinks")); // 11 atts.addElement(new Attribute("Wikipedia_generality")); // 12 atts.addElement(new Attribute("Probability")); // 16 atts.addElement(new Attribute("Rank")); // 17 } else if (i == keyphrasesAtt) { if (nominalClassValue) { FastVector vals = new FastVector(2); vals.addElement("False"); vals.addElement("True"); atts.addElement(new Attribute("Keyphrase?", vals)); } else { atts.addElement(new Attribute("Keyphrase?")); } } else { atts.addElement(getInputFormat().attribute(i)); } } Instances outFormat = new Instances("mauidata", atts, 0); setOutputFormat(outFormat); // Convert pending input instances into output data for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance current = getInputFormat().instance(i); FastVector vector = convertInstance(current, true); Enumeration<Instance> en = vector.elements(); while (en.hasMoreElements()) { Instance inst = (Instance) en.nextElement(); push(inst); } } }
From source file:com.entopix.maui.main.MauiModelBuilder.java
License:Open Source License
/** * Builds the model from the training data * @throws MauiFilterException /*from ww w.ja va2s. c om*/ */ public MauiFilter buildModel(List<MauiDocument> documents) throws MauiFilterException { log.info("-- Building the model... "); FastVector atts = new FastVector(3); atts.addElement(new Attribute("filename", (FastVector) null)); atts.addElement(new Attribute("document", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); mauiFilter = new MauiFilter(); mauiFilter.setMaxPhraseLength(maxPhraseLength); mauiFilter.setMinPhraseLength(minPhraseLength); mauiFilter.setMinNumOccur(minNumOccur); mauiFilter.setStemmer(stemmer); mauiFilter.setDocumentLanguage(documentLanguage); mauiFilter.setVocabularyName(vocabularyName); mauiFilter.setVocabularyFormat(vocabularyFormat); mauiFilter.setStopwords(stopwords); mauiFilter.setVocabulary(vocabulary); if (classifier != null) { mauiFilter.setClassifier(classifier); } mauiFilter.setInputFormat(data); // set features configurations mauiFilter.setBasicFeatures(useBasicFeatures); mauiFilter.setKeyphrasenessFeature(useKeyphrasenessFeature); mauiFilter.setFrequencyFeatures(useFrequencyFeatures); mauiFilter.setPositionsFeatures(usePositionsFeatures); mauiFilter.setLengthFeature(useLengthFeature); mauiFilter.setThesaurusFeatures(useThesaurusFeatures); mauiFilter.setWikipediaFeatures(useWikipediaFeatures, wikiFeatures); mauiFilter.setClassifier(classifier); if (!vocabularyName.equals("none")) { loadVocabulary(); mauiFilter.setVocabulary(vocabulary); } log.info("-- Adding documents as instances... "); for (MauiDocument document : documents) { double[] newInst = new double[3]; newInst[0] = data.attribute(0).addStringValue(document.getFileName()); // Adding the text and the topics for the document to the instance if (document.getTextContent().length() > 0) { newInst[1] = data.attribute(1).addStringValue(document.getTextContent()); } else { newInst[1] = Instance.missingValue(); } if (document.getTopicsString().length() > 0) { newInst[2] = data.attribute(2).addStringValue(document.getTopicsString()); } else { newInst[2] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); mauiFilter.input(data.instance(0)); data = data.stringFreeStructure(); } log.info("-- Building the model... "); mauiFilter.batchFinished(); while ((mauiFilter.output()) != null) { } return mauiFilter; }
From source file:com.esda.util.StringToWordVector.java
License:Open Source License
/** * determines the dictionary.//ww w . j av a 2 s.c o m */ private void determineDictionary() { // initialize stopwords Stopwords stopwords = new Stopwords(); if (getUseStoplist()) { try { if (getStopwords().exists() && !getStopwords().isDirectory()) stopwords.read(getStopwords()); } catch (Exception e) { e.printStackTrace(); } } // Operate on a per-class basis if class attribute is set int classInd = getInputFormat().classIndex(); int values = 1; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { values = getInputFormat().attribute(classInd).numValues(); } // TreeMap dictionaryArr [] = new TreeMap[values]; TreeMap[] dictionaryArr = new TreeMap[values]; for (int i = 0; i < values; i++) { dictionaryArr[i] = new TreeMap(); } // Make sure we know which fields to convert determineSelectedRange(); // Tokenize all training text into an orderedMap of "words". long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances()); for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance instance = getInputFormat().instance(i); int vInd = 0; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { vInd = (int) instance.classValue(); } // Iterate through all relevant string attributes of the current // instance Hashtable h = new Hashtable(); for (int j = 0; j < instance.numAttributes(); j++) { if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { // Get tokenizer m_Tokenizer.tokenize(instance.stringValue(j)); // Iterate through tokens, perform stemming, and remove // stopwords // (if required) while (m_Tokenizer.hasMoreElements()) { String word = ((String) m_Tokenizer.nextElement()).intern(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); String[] wordsArr = word.split(" "); StringBuilder stemmedStr = new StringBuilder(); for (String wordStr : wordsArr) { if (!this.m_useStoplist || !stopwords.is(wordStr)) { stemmedStr.append(m_Stemmer.stem(wordStr)); stemmedStr.append(" "); } } /*for (int icounter = 0; icounter < wordsArr.length; icounter++) { stemmedStr += m_Stemmer.stem(wordsArr[icounter]); if (icounter + 1 < wordsArr.length) stemmedStr += " "; }*/ word = stemmedStr.toString().trim(); if (!(h.containsKey(word))) h.put(word, new Integer(0)); Count count = (Count) dictionaryArr[vInd].get(word); if (count == null) { dictionaryArr[vInd].put(word, new Count(1)); } else { count.count++; } } } } // updating the docCount for the words that have occurred in this // instance(document). Enumeration e = h.keys(); while (e.hasMoreElements()) { String word = (String) e.nextElement(); Count c = (Count) dictionaryArr[vInd].get(word); if (c != null) { c.docCount++; } else System.err.println( "Warning: A word should definitely be in the " + "dictionary.Please check the code"); } if (pruneRate > 0) { if (i % pruneRate == 0 && i > 0) { for (int z = 0; z < values; z++) { Vector d = new Vector(1000); Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count <= 1) { d.add(word); } } Iterator iter = d.iterator(); while (iter.hasNext()) { String word = (String) iter.next(); dictionaryArr[z].remove(word); } } } } } // Figure out the minimum required word frequency int totalsize = 0; int prune[] = new int[values]; for (int z = 0; z < values; z++) { totalsize += dictionaryArr[z].size(); int array[] = new int[dictionaryArr[z].size()]; int pos = 0; Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); array[pos] = count.count; pos++; } // sort the array sortArray(array); if (array.length < m_WordsToKeep) { // if there aren't enough words, set the threshold to // minFreq prune[z] = m_minTermFreq; } else { // otherwise set it to be at least minFreq prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]); } } // Convert the dictionary into an attribute index // and create one attribute per word FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes()); // Add the non-converted attributes int classIndex = -1; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().classIndex() == i) { classIndex = attributes.size(); } attributes.addElement(getInputFormat().attribute(i).copy()); } } // Add the word vector attributes (eliminating duplicates // that occur in multiple classes) TreeMap newDictionary = new TreeMap(); int index = attributes.size(); for (int z = 0; z < values; z++) { Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count >= prune[z]) { if (newDictionary.get(word) == null) { newDictionary.put(word, new Integer(index++)); attributes.addElement(new Attribute(m_Prefix + word)); } } } } // Compute document frequencies m_DocsCounts = new int[attributes.size()]; Iterator it = newDictionary.keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); int idx = ((Integer) newDictionary.get(word)).intValue(); int docsCount = 0; for (int j = 0; j < values; j++) { Count c = (Count) dictionaryArr[j].get(word); if (c != null) docsCount += c.docCount; } m_DocsCounts[idx] = docsCount; } // Trim vector and set instance variables attributes.trimToSize(); m_Dictionary = newDictionary; m_NumInstances = getInputFormat().numInstances(); // Set the filter's output format Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0); outputFormat.setClassIndex(classIndex); setOutputFormat(outputFormat); }
From source file:com.gamerecommendation.Weatherconditions.Clasificacion.java
public String clasificar(String[] testCases) throws Exception { String ruta = "model.model"; InputStream classModelStream; classModelStream = getClass().getResourceAsStream(ruta); Classifier clasify = (Classifier) SerializationHelper.read(classModelStream); FastVector condition = new FastVector(); condition.addElement("Cloudy"); condition.addElement("Clear"); condition.addElement("Sunny"); condition.addElement("Fair"); condition.addElement("Partly_Cloudy"); condition.addElement("Mostly_Cloudy"); condition.addElement("Showers"); condition.addElement("Haze"); condition.addElement("Dust"); condition.addElement("Other"); Attribute _condition = new Attribute("contition", condition); FastVector temperature = new FastVector(); temperature.addElement("Hot"); temperature.addElement("Mild"); temperature.addElement("Cool"); Attribute _temperature = new Attribute("temperature", temperature); FastVector chill = new FastVector(); chill.addElement("Regrettable"); chill.addElement("Mint"); Attribute _chill = new Attribute("chill", chill); FastVector direction = new FastVector(); direction.addElement("Mint"); direction.addElement("Fair"); direction.addElement("Regular"); Attribute _direction = new Attribute("direction", direction); FastVector speed = new FastVector(); speed.addElement("Mint"); speed.addElement("Fair"); speed.addElement("Regular"); Attribute _speed = new Attribute("speed", speed); FastVector humidity = new FastVector(); humidity.addElement("High"); humidity.addElement("Normal"); humidity.addElement("Low"); Attribute _humidity = new Attribute("humidity", humidity); FastVector visibility = new FastVector(); visibility.addElement("Recommended"); visibility.addElement("Not_Recommended"); Attribute _visibility = new Attribute("visibility", visibility); FastVector preassure = new FastVector(); preassure.addElement("Fair"); preassure.addElement("Mint"); Attribute _preassure = new Attribute("preassure", preassure); FastVector Class = new FastVector(); Class.addElement("Recommended"); Class.addElement("Not_Recommended"); Attribute _Class = new Attribute("class", Class); FastVector atributos = new FastVector(9); atributos.addElement(_condition);//from ww w . j a v a 2 s . c om atributos.addElement(_temperature); atributos.addElement(_chill); atributos.addElement(_direction); atributos.addElement(_speed); atributos.addElement(_humidity); atributos.addElement(_visibility); atributos.addElement(_preassure); atributos.addElement(_Class); ArrayList<Attribute> atributs = new ArrayList<>(); atributs.add(_condition); atributs.add(_temperature); atributs.add(_chill); atributs.add(_direction); atributs.add(_speed); atributs.add(_humidity); atributs.add(_visibility); atributs.add(_preassure); atributs.add(_Class); //Aqu se crea la instacia, que tiene todos los atributos del modelo Instances dataTest = new Instances("TestCases", atributos, 1); dataTest.setClassIndex(8); Instance setPrueba = new Instance(9); int index = -1; for (int i = 0; i < 8; i++) { index = atributs.get(i).indexOfValue(testCases[i]); //System.out.println(i + " " + atributs.get(i) + " " + index + " " + testCases[i]); setPrueba.setValue(atributs.get(i), index); } //Agregando el set que se desea evaluar. dataTest.add(setPrueba); //Realizando la Prediccin //La instancia es la 0 debido a que es la unica que se encuentra. double valorP = clasify.classifyInstance(dataTest.instance(0)); //get the name of the class value String prediccion = dataTest.classAttribute().value((int) valorP); return prediccion; }
From source file:com.github.polarisation.kea.main.KEAKeyphraseExtractor.java
License:Open Source License
/** * Builds the model from the files//from w w w . j a v a 2s . com */ public void extractKeyphrases(Hashtable stems) throws Exception { Vector stats = new Vector(); // Check whether there is actually any data // = if there any files in the directory if (stems.size() == 0) { throw new Exception("Couldn't find any data!"); } m_KEAFilter.setNumPhrases(m_numPhrases); m_KEAFilter.setVocabulary(m_vocabulary); m_KEAFilter.setVocabularyFormat(m_vocabularyFormat); m_KEAFilter.setDocumentLanguage(getDocumentLanguage()); m_KEAFilter.setStemmer(m_Stemmer); m_KEAFilter.setStopwords(m_Stopwords); if (getVocabulary().equals("none")) { m_KEAFilter.m_NODEfeature = false; } else { m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords); } FastVector atts = new FastVector(3); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); atts.addElement(new Attribute("filename", (String) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); if (m_KEAFilter.m_Dictionary == null) { buildGlobalDictionaries(stems); } System.err.println("-- Extracting Keyphrases... "); // Extract keyphrases Enumeration elem = stems.keys(); // Enumeration over all files in the directory (now in the hash): while (elem.hasMoreElements()) { String str = (String) elem.nextElement(); double[] newInst = new double[2]; try { File txt = new File(m_dirName + "/" + str + ".txt"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(txt), m_encoding); } else { is = new InputStreamReader(new FileInputStream(txt)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("Can't read document " + str + ".txt"); } newInst[0] = Instance.missingValue(); } try { File key = new File(m_dirName + "/" + str + ".key"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(key), m_encoding); } else { is = new InputStreamReader(new FileInputStream(key)); } StringBuffer keyStr = new StringBuffer(); int c; // keyStr = keyphrases in the str.key file // Kea assumes, that these keyphrases were assigned by the author // and evaluates extracted keyphrases againse these while ((c = is.read()) != -1) { keyStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("No existing keyphrases for stem " + str + "."); } newInst[1] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); m_KEAFilter.input(data.instance(0)); data = data.stringFreeStructure(); if (m_debug) { System.err.println("-- Document: " + str); } Instance[] topRankedInstances = new Instance[m_numPhrases]; Instance inst; // Iterating over all extracted keyphrases (inst) while ((inst = m_KEAFilter.output()) != null) { int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1; if (index < m_numPhrases) { topRankedInstances[index] = inst; } } if (m_debug) { System.err.println("-- Keyphrases and feature values:"); } FileOutputStream out = null; PrintWriter printer = null; File key = new File(m_dirName + "/" + str + ".key"); if (!key.exists()) { out = new FileOutputStream(m_dirName + "/" + str + ".key"); if (!m_encoding.equals("default")) { printer = new PrintWriter(new OutputStreamWriter(out, m_encoding)); } else { printer = new PrintWriter(out); } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < m_numPhrases; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) { numCorrect += 1.0; } if (printer != null) { printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getUnstemmedPhraseIndex())); if (m_AdditionalInfo) { printer.print("\t"); printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getStemmedPhraseIndex())); printer.print("\t"); printer.print(Utils.doubleToString( topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()), 4)); } printer.println(); } if (m_debug) { System.err.println(topRankedInstances[i]); } } } if (numExtracted > 0) { if (m_debug) { System.err.println("-- " + numCorrect + " correct"); } stats.addElement(new Double(numCorrect)); } if (printer != null) { printer.flush(); printer.close(); out.close(); } } double[] st = new double[stats.size()]; for (int i = 0; i < stats.size(); i++) { st[i] = ((Double) stats.elementAt(i)).doubleValue(); } double avg = Utils.mean(st); double stdDev = Math.sqrt(Utils.variance(st)); System.err.println("Avg. number of matching keyphrases compared to existing ones : " + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2)); System.err.println("Based on " + stats.size() + " documents"); // m_KEAFilter.batchFinished(); }
From source file:com.hack23.cia.service.impl.action.user.wordcount.WordCounterImpl.java
License:Apache License
@Override public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) { final String html = documentContentData.getContent(); final Attribute input = new Attribute("html", (ArrayList<String>) null); final ArrayList<Attribute> inputVec = new ArrayList<>(); inputVec.add(input);//from w ww . ja va 2 s . c o m final Instances htmlInst = new Instances("html", inputVec, 1); htmlInst.add(new DenseInstance(1)); htmlInst.instance(0).setValue(0, html); final StopwordsHandler StopwordsHandler = new StopwordsHandler() { @Override public boolean isStopword(final String word) { return word.length() < 5; } }; final NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(1); tokenizer.setNGramMaxSize(1); tokenizer.setDelimiters(" \r\n\t.,;:'\"()?!'"); final StringToWordVector filter = new StringToWordVector(); filter.setTokenizer(tokenizer); filter.setStopwordsHandler(StopwordsHandler); filter.setLowerCaseTokens(true); filter.setOutputWordCounts(true); filter.setWordsToKeep(maxResult); final Map<String, Integer> result = new HashMap<>(); try { filter.setInputFormat(htmlInst); final Instances dataFiltered = Filter.useFilter(htmlInst, filter); final Instance last = dataFiltered.lastInstance(); final int numAttributes = last.numAttributes(); for (int i = 0; i < numAttributes; i++) { result.put(last.attribute(i).name(), Integer.valueOf(last.toString(i))); } } catch (final Exception e) { LOGGER.warn("Problem calculating wordcount for : {} , exception:{}", documentContentData.getId(), e); } return result; }
From source file:com.ivanrf.smsspam.SpamClassifier.java
License:Apache License
public static String classify(String model, String text, JTextArea log) { FilteredClassifier classifier = loadModel(model, log); //Create the instance ArrayList<String> fvNominalVal = new ArrayList<String>(); fvNominalVal.add("ham"); fvNominalVal.add("spam"); Attribute attribute1 = new Attribute("spam_class", fvNominalVal); Attribute attribute2 = new Attribute("text", (List<String>) null); ArrayList<Attribute> fvWekaAttributes = new ArrayList<Attribute>(); fvWekaAttributes.add(attribute1);//ww w . j ava2 s . co m fvWekaAttributes.add(attribute2); Instances instances = new Instances("Test relation", fvWekaAttributes, 1); instances.setClassIndex(0); DenseInstance instance = new DenseInstance(2); instance.setValue(attribute2, text); instances.add(instance); publishEstado("=== Instance created ===", log); publishEstado(instances.toString(), log); //Classify the instance try { publishEstado("=== Classifying instance ===", log); double pred = classifier.classifyInstance(instances.instance(0)); publishEstado("=== Instance classified ===", log); String classPredicted = instances.classAttribute().value((int) pred); publishEstado("Class predicted: " + classPredicted, log); return classPredicted; } catch (Exception e) { publishEstado("Error found when classifying the text", log); return null; } }
From source file:com.jgaap.util.Instance.java
License:Open Source License
/** * Main method for testing this class.//from w w w. j a v a2 s .co m * * @param options the commandline options - ignored */ //@ requires options != null; public static void main(String[] options) { try { // Create numeric attributes "length" and "weight" Attribute length = new Attribute("length"); Attribute weight = new Attribute("weight"); // Create vector to hold nominal values "first", "second", "third" FastVector my_nominal_values = new FastVector(3); my_nominal_values.addElement("first"); my_nominal_values.addElement("second"); my_nominal_values.addElement("third"); // Create nominal attribute "position" Attribute position = new Attribute("position", my_nominal_values); // Create vector of the above attributes FastVector attributes = new FastVector(3); attributes.addElement(length); attributes.addElement(weight); attributes.addElement(position); // Create the empty dataset "race" with above attributes Instances race = new Instances("race", attributes, 0); // Make position the class attribute race.setClassIndex(position.index()); // Create empty instance with three attribute values Instance inst = new Instance(3); // Set instance's values for the attributes "length", "weight", and "position" inst.setValue(length, 5.3); inst.setValue(weight, 300); inst.setValue(position, "first"); // Set instance's dataset to be the dataset "race" inst.setDataset(race); // Print the instance System.out.println("The instance: " + inst); // Print the first attribute System.out.println("First attribute: " + inst.attribute(0)); // Print the class attribute System.out.println("Class attribute: " + inst.classAttribute()); // Print the class index System.out.println("Class index: " + inst.classIndex()); // Say if class is missing System.out.println("Class is missing: " + inst.classIsMissing()); // Print the instance's class value in internal format System.out.println("Class value (internal format): " + inst.classValue()); // Print a shallow copy of this instance Instance copy = (Instance) inst.copy(); System.out.println("Shallow copy: " + copy); // Set dataset for shallow copy copy.setDataset(inst.dataset()); System.out.println("Shallow copy with dataset set: " + copy); // Unset dataset for copy, delete first attribute, and insert it again copy.setDataset(null); copy.deleteAttributeAt(0); copy.insertAttributeAt(0); copy.setDataset(inst.dataset()); System.out.println("Copy with first attribute deleted and inserted: " + copy); // Enumerate attributes (leaving out the class attribute) System.out.println("Enumerating attributes (leaving out class):"); Enumeration enu = inst.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute att = (Attribute) enu.nextElement(); System.out.println(att); } // Headers are equivalent? System.out.println("Header of original and copy equivalent: " + inst.equalHeaders(copy)); // Test for missing values System.out.println("Length of copy missing: " + copy.isMissing(length)); System.out.println("Weight of copy missing: " + copy.isMissing(weight.index())); System.out.println("Length of copy missing: " + Instance.isMissingValue(copy.value(length))); System.out.println("Missing value coded as: " + Instance.missingValue()); // Prints number of attributes and classes System.out.println("Number of attributes: " + copy.numAttributes()); System.out.println("Number of classes: " + copy.numClasses()); // Replace missing values double[] meansAndModes = { 2, 3, 0 }; copy.replaceMissingValues(meansAndModes); System.out.println("Copy with missing value replaced: " + copy); // Setting and getting values and weights copy.setClassMissing(); System.out.println("Copy with missing class: " + copy); copy.setClassValue(0); System.out.println("Copy with class value set to first value: " + copy); copy.setClassValue("third"); System.out.println("Copy with class value set to \"third\": " + copy); copy.setMissing(1); System.out.println("Copy with second attribute set to be missing: " + copy); copy.setMissing(length); System.out.println("Copy with length set to be missing: " + copy); copy.setValue(0, 0); System.out.println("Copy with first attribute set to 0: " + copy); copy.setValue(weight, 1); System.out.println("Copy with weight attribute set to 1: " + copy); copy.setValue(position, "second"); System.out.println("Copy with position set to \"second\": " + copy); copy.setValue(2, "first"); System.out.println("Copy with last attribute set to \"first\": " + copy); System.out.println("Current weight of instance copy: " + copy.weight()); copy.setWeight(2); System.out.println("Current weight of instance copy (set to 2): " + copy.weight()); System.out.println("Last value of copy: " + copy.toString(2)); System.out.println("Value of position for copy: " + copy.toString(position)); System.out.println("Last value of copy (internal format): " + copy.value(2)); System.out.println("Value of position for copy (internal format): " + copy.value(position)); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.kdcloud.lib.domain.DataSpecification.java
License:Open Source License
public Instances newInstances(String relationalName) { return new Instances(relationalName, getAttrInfo(), 1000); }
From source file:com.kdcloud.lib.domain.DataSpecification.java
License:Open Source License
public boolean matchingSpecification(Instances instances) { return new Instances("", getAttrInfo(), 0).equalHeaders(instances); }