List of usage examples for weka.core Instance setValue
public void setValue(Attribute att, String value);
From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java
License:Open Source License
private void train() throws Exception { models = new M5P[ModelNum]; for (int i = 0; i < ModelNum; i++) { models[i] = buildModel(labeledInstances, M[i]); }/* www . jav a 2 s .c om*/ for (int i = 0; i < this.comtIterations; i++) { ArrayList<Instance>[] InstancePiSet = new ArrayList[ModelNum]; for (int j = 0; j < ModelNum; j++) InstancePiSet[j] = new ArrayList<Instance>(); for (int m = 0; m < ModelNum; m++) { double maxDelta = 0; Instance maxDeltaXY = null; Enumeration<Instance> enu = this.unlabeledInstances.enumerateInstances(); while (enu.hasMoreElements()) { Instance ulIns = enu.nextElement(); Instances omega = getSiblings(models[m], ulIns); double y = models[m].classifyInstance(ulIns); if (indexOfClass == -1) indexOfClass = labeledInstances.classIndex(); ulIns.setValue(indexOfClass, y); Instances instancesPi = new Instances(models[m].getM5RootNode().zyqGetTrainingSet()); instancesPi.add(ulIns); M5P modelPi = buildModel(instancesPi, M[m]); double delta = computeOmegaDelta(models[m], modelPi, omega); if (maxDelta < delta) { maxDelta = delta; maxDeltaXY = ulIns; } } //now check facts about delta if (maxDelta > 0) { InstancePiSet[m].add(maxDeltaXY); this.unlabeledInstances.delete(this.unlabeledInstances.indexOf(maxDeltaXY)); } } //check for both model boolean toExit = true; for (int m = 0; m < ModelNum; m++) { if (InstancePiSet[m].size() > 0) { toExit = false; break; } } if (toExit) break; else { //update the models int toGen = 0; for (int m = 0; m < ModelNum; m++) { Instances set = models[m].getM5RootNode().zyqGetTrainingSet(); toGen += InstancePiSet[m].size(); for (Instance ins : InstancePiSet[m]) set.add(ins); models[m] = buildModel(set, M[m]); } //Replenish pool U' to size p Instances toAdd = retrieveMore(toGen); unlabeledInstances.addAll(toAdd); } //we will go to another round of iteration } //iterate for a number of rounds or break out on empty InstancesPiSets //now we have the model as y = 0.5*sum(models[m].predict(x)) }
From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetClassifier.java
License:Apache License
/** * Method that converts a text message into an instance. * * @param text the message content to convert * @param data the header information/*from www . java2s . c o m*/ * @return the generated Instance */ private Instance makeInstance(String text, Instances data) { Instance instance = new Instance(2); Attribute messageAtt = data.attribute("content"); instance.setValue(messageAtt, messageAtt.addStringValue(text)); instance.setDataset(data); return instance; }
From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetFeatureExtractor.java
License:Apache License
/** * Method which contructs the arff file for weka with the training data *//*from www . j ava2 s .co m*/ public static void constructModel() { Instances instdata = null; try { FastVector atts; atts = new FastVector(); atts.addElement(new Attribute("content", (FastVector) null)); FastVector fvClassVal = new FastVector(4); fvClassVal.addElement(""); fvClassVal.addElement("neutral"); fvClassVal.addElement("negative"); fvClassVal.addElement("positive"); Attribute ClassAttribute = new Attribute("Class", fvClassVal); atts.addElement(ClassAttribute); instdata = new Instances("tweetData", atts, 0); CsvReader data = new CsvReader("../classified data/traindata.csv"); int i = 0; while (data.readRecord()) { double[] vals = new double[instdata.numAttributes()]; String class_id = data.get(0); switch (Integer.parseInt(class_id)) { case 0: class_id = "negative"; break; case 2: class_id = "neutral"; break; case 4: class_id = "positive"; break; } String tweet_content = data.get(5); Instance iInst = new Instance(2); iInst.setValue((Attribute) atts.elementAt(0), tweet_content); iInst.setValue((Attribute) atts.elementAt(1), class_id); instdata.add(iInst); System.out.println("[" + i + "] " + class_id + ":" + tweet_content); i++; } data.close(); StringToWordVector filter = new StringToWordVector(); instdata.setClassIndex(instdata.numAttributes() - 1); filter.setInputFormat(instdata); Instances newdata = Filter.useFilter(instdata, filter); ArffSaver saver = new ArffSaver(); saver.setInstances(newdata); saver.setFile(new File("./data/train2data.arff")); saver.writeBatch(); } catch (Exception ex) { Logger.getLogger(TweetFeatureExtractor.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.entopix.maui.filters.MauiFilter.java
License:Open Source License
/** * Converts an instance./*w w w. j av a2 s .co m*/ */ private FastVector convertInstance(Instance instance, boolean training) { FastVector vector = new FastVector(); String fileName = instance.stringValue(fileNameAtt); if (debugMode) { log.info("-- Converting instance for document " + fileName); } // Get the key phrases for the document HashMap<String, Counter> hashKeyphrases = null; if (!instance.isMissing(keyphrasesAtt)) { String keyphrases = instance.stringValue(keyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases); } // Get the document text String documentText = instance.stringValue(documentAtt); // Compute the candidate topics HashMap<String, Candidate> candidateList; if (allCandidates != null && allCandidates.containsKey(instance)) { candidateList = allCandidates.get(instance); } else { candidateList = getCandidates(documentText); } if (debugMode) { log.info(candidateList.size() + " candidates "); } // Set indices for key attributes int tfidfAttIndex = documentAtt + 2; int distAttIndex = documentAtt + 3; int probsAttIndex = documentAtt + numFeatures; int countPos = 0; int countNeg = 0; // Go through the phrases and convert them into instances for (Candidate candidate : candidateList.values()) { if (candidate.getFrequency() < minOccurFrequency) { continue; } String name = candidate.getName(); String orig = candidate.getBestFullForm(); if (!vocabularyName.equals("none")) { orig = candidate.getTitle(); } double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(classifierData); double[] probs = null; try { // Get probability of a phrase being key phrase probs = classifier.distributionForInstance(inst); } catch (Exception e) { log.error("Exception while getting probability for candidate " + candidate.getName()); continue; } double prob = probs[0]; if (nominalClassValue) { prob = probs[1]; } // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures + 2]; int pos = 0; for (int i = 1; i < instance.numAttributes(); i++) { if (i == documentAtt) { // output of values for a given phrase: // 0 Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(name); newInst[pos++] = index; // 1 Add original version if (orig != null) { index = outputFormatPeek().attribute(pos).addStringValue(orig); } else { index = outputFormatPeek().attribute(pos).addStringValue(name); } // 2 newInst[pos++] = index; // Add features newInst[pos++] = inst.value(tfIndex); // 3 newInst[pos++] = inst.value(idfIndex); // 4 newInst[pos++] = inst.value(tfidfIndex); // 5 newInst[pos++] = inst.value(firstOccurIndex); // 6 newInst[pos++] = inst.value(lastOccurIndex); // 7 newInst[pos++] = inst.value(spreadOccurIndex); // 8 newInst[pos++] = inst.value(domainKeyphIndex); // 9 newInst[pos++] = inst.value(lengthIndex); // 10 newInst[pos++] = inst.value(generalityIndex); // 11 newInst[pos++] = inst.value(nodeDegreeIndex); // 12 newInst[pos++] = inst.value(invWikipFreqIndex); // 13 newInst[pos++] = inst.value(totalWikipKeyphrIndex); // 14 newInst[pos++] = inst.value(wikipGeneralityIndex); // 15 // Add probability probsAttIndex = pos; newInst[pos++] = prob; // 16 // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); // 17 } else if (i == keyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); if (inst.classValue() == 0) { countNeg++; } else { countPos++; } } if (debugMode) { log.info(countPos + " positive; " + countNeg + " negative instances"); } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // log.info(vals[i] + "\t" + currentInstance); // Short cut: if phrase very unlikely make rank very low and // continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current // phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } currentInstance.setValue(probsAttIndex + 1, rank++); } return vector; }
From source file:com.gamerecommendation.Weatherconditions.Clasificacion.java
public String clasificar(String[] testCases) throws Exception { String ruta = "model.model"; InputStream classModelStream; classModelStream = getClass().getResourceAsStream(ruta); Classifier clasify = (Classifier) SerializationHelper.read(classModelStream); FastVector condition = new FastVector(); condition.addElement("Cloudy"); condition.addElement("Clear"); condition.addElement("Sunny"); condition.addElement("Fair"); condition.addElement("Partly_Cloudy"); condition.addElement("Mostly_Cloudy"); condition.addElement("Showers"); condition.addElement("Haze"); condition.addElement("Dust"); condition.addElement("Other"); Attribute _condition = new Attribute("contition", condition); FastVector temperature = new FastVector(); temperature.addElement("Hot"); temperature.addElement("Mild"); temperature.addElement("Cool"); Attribute _temperature = new Attribute("temperature", temperature); FastVector chill = new FastVector(); chill.addElement("Regrettable"); chill.addElement("Mint"); Attribute _chill = new Attribute("chill", chill); FastVector direction = new FastVector(); direction.addElement("Mint"); direction.addElement("Fair"); direction.addElement("Regular"); Attribute _direction = new Attribute("direction", direction); FastVector speed = new FastVector(); speed.addElement("Mint"); speed.addElement("Fair"); speed.addElement("Regular"); Attribute _speed = new Attribute("speed", speed); FastVector humidity = new FastVector(); humidity.addElement("High"); humidity.addElement("Normal"); humidity.addElement("Low"); Attribute _humidity = new Attribute("humidity", humidity); FastVector visibility = new FastVector(); visibility.addElement("Recommended"); visibility.addElement("Not_Recommended"); Attribute _visibility = new Attribute("visibility", visibility); FastVector preassure = new FastVector(); preassure.addElement("Fair"); preassure.addElement("Mint"); Attribute _preassure = new Attribute("preassure", preassure); FastVector Class = new FastVector(); Class.addElement("Recommended"); Class.addElement("Not_Recommended"); Attribute _Class = new Attribute("class", Class); FastVector atributos = new FastVector(9); atributos.addElement(_condition);//from w ww.j a v a2 s . c o m atributos.addElement(_temperature); atributos.addElement(_chill); atributos.addElement(_direction); atributos.addElement(_speed); atributos.addElement(_humidity); atributos.addElement(_visibility); atributos.addElement(_preassure); atributos.addElement(_Class); ArrayList<Attribute> atributs = new ArrayList<>(); atributs.add(_condition); atributs.add(_temperature); atributs.add(_chill); atributs.add(_direction); atributs.add(_speed); atributs.add(_humidity); atributs.add(_visibility); atributs.add(_preassure); atributs.add(_Class); //Aqu se crea la instacia, que tiene todos los atributos del modelo Instances dataTest = new Instances("TestCases", atributos, 1); dataTest.setClassIndex(8); Instance setPrueba = new Instance(9); int index = -1; for (int i = 0; i < 8; i++) { index = atributs.get(i).indexOfValue(testCases[i]); //System.out.println(i + " " + atributs.get(i) + " " + index + " " + testCases[i]); setPrueba.setValue(atributs.get(i), index); } //Agregando el set que se desea evaluar. dataTest.add(setPrueba); //Realizando la Prediccin //La instancia es la 0 debido a que es la unica que se encuentra. double valorP = clasify.classifyInstance(dataTest.instance(0)); //get the name of the class value String prediccion = dataTest.classAttribute().value((int) valorP); return prediccion; }
From source file:com.openkm.kea.filter.KEAFilter.java
License:Open Source License
/** * Converts an instance.//from w w w . jav a 2 s . com */ private FastVector convertInstance(Instance instance, boolean training) throws Exception { FastVector vector = new FastVector(); if (m_Debug) { log.info("-- Converting instance"); } // Get the key phrases for the document HashMap<String, Counter> hashKeyphrases = null; HashMap<String, Counter> hashKeysEval = null; if (!instance.isMissing(m_KeyphrasesAtt)) { String keyphrases = instance.stringValue(m_KeyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases, false); hashKeysEval = getGivenKeyphrases(keyphrases, true); } // Get the phrases for the document HashMap<String, FastVector> hash = new HashMap<String, FastVector>(); int length = getPhrases(hash, instance.stringValue(m_DocumentAtt)); // hash = getComposits(hash); /* Experimental: To compute how many of the manual keyphrases appear in the documents: log.info("Doc phrases found " + hash.size()); log.info("Manual keyphrases: "); Iterator iter = hashKeyphrases.keySet().iterator(); int count = 0; while (iter.hasNext()) { String id = (String)iter.next(); if (hash.containsKey(id)) { count++; } } double max_recall = (double)count/(double)hashKeyphrases.size(); m_max_recall += max_recall; doc++; double avg_m_max_recall = m_max_recall/(double)doc; String file = instance.stringValue(2); log.info(count + " out of " + hashKeyphrases.size() + " are in the document "); log.info("Max recall : " + avg_m_max_recall + " on " + doc + " documents "); */ // Compute number of extra attributes int numFeatures = 5; if (m_Debug) { if (m_KFused) { numFeatures = numFeatures + 1; } } if (m_STDEVfeature) { numFeatures = numFeatures + 1; } if (m_NODEfeature) { numFeatures = numFeatures + 1; } if (m_LENGTHfeature) { numFeatures = numFeatures + 1; } // Set indices of key attributes //int phraseAttIndex = m_DocumentAtt; int tfidfAttIndex = m_DocumentAtt + 2; int distAttIndex = m_DocumentAtt + 3; int probsAttIndex = m_DocumentAtt + numFeatures - 1; //int classAttIndex = numFeatures; // Go through the phrases and convert them into instances Iterator<String> it = hash.keySet().iterator(); while (it.hasNext()) { String id = it.next(); FastVector phraseInfo = (FastVector) hash.get(id); double[] vals = featVals(id, phraseInfo, training, hashKeysEval, hashKeyphrases, length, hash); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(m_ClassifierData); // Get probability of a phrase being key phrase double[] probs = m_Classifier.distributionForInstance(inst); // If simple Naive Bayes used, change here to //double prob = probs[1]; double prob = probs[0]; // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures]; int pos = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == m_DocumentAtt) { // output of values for a given phrase: // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(id); newInst[pos++] = index; // Add original version String orig = (String) phraseInfo.elementAt(2); if (orig != null) { index = outputFormatPeek().attribute(pos).addStringValue(orig); } else { index = outputFormatPeek().attribute(pos).addStringValue(id); } newInst[pos++] = index; // Add TFxIDF newInst[pos++] = inst.value(m_TfidfIndex); // Add distance newInst[pos++] = inst.value(m_FirstOccurIndex); // Add other features if (m_Debug) { if (m_KFused) { newInst[pos++] = inst.value(m_KeyFreqIndex); } } if (m_STDEVfeature) { newInst[pos++] = inst.value(m_STDEVIndex); } if (m_NODEfeature) { newInst[pos++] = inst.value(m_NodeIndex); } if (m_LENGTHfeature) { newInst[pos++] = inst.value(m_LengthIndex); } // Add probability probsAttIndex = pos; newInst[pos++] = prob; // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); } else if (i == m_KeyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); } // Add dummy instances for keyphrases that don't occur // in the document if (hashKeysEval != null) { Iterator<String> phrases = hashKeysEval.keySet().iterator(); while (phrases.hasNext()) { String phrase = phrases.next(); double[] newInst = new double[instance.numAttributes() + numFeatures]; int pos = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == m_DocumentAtt) { // log.info("Here: " + phrase); // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(phrase); newInst[pos++] = (double) index; // Add original version index = outputFormatPeek().attribute(pos).addStringValue(phrase); newInst[pos++] = (double) index; // Add TFxIDF newInst[pos++] = Instance.missingValue(); // Add distance newInst[pos++] = Instance.missingValue(); // Add other features if (m_Debug) { if (m_KFused) { newInst[pos++] = Instance.missingValue(); } } if (m_STDEVfeature) { newInst[pos++] = Instance.missingValue(); } if (m_NODEfeature) { newInst[pos++] = Instance.missingValue(); } if (m_LENGTHfeature) { newInst[pos++] = Instance.missingValue(); } // Add probability and rank newInst[pos++] = -Double.MAX_VALUE; // newInst[pos++] = Instance.missingValue(); } else if (i == m_KeyphrasesAtt) { newInst[pos++] = 1; // Keyphrase } else { newInst[pos++] = instance.value(i); } Instance inst = new Instance(instance.weight(), newInst); inst.setDataset(outputFormatPeek()); vector.addElement(inst); } } } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // Short cut: if phrase very unlikely make rank very low and continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } currentInstance.setValue(probsAttIndex + 1, rank++); } return vector; }
From source file:com.reactivetechnologies.analytics.mapper.TEXTDataMapper.java
License:Open Source License
@Override public Dataset mapStringToModel(JsonRequest request) throws ParseException { if (request != null && request.getData() != null && request.getData().length > 0) { FastVector fvWekaAttributes = new FastVector(2); FastVector nil = null;/*w w w . j a v a2 s.c om*/ Attribute attr0 = new Attribute("text", nil, 0); FastVector fv = new FastVector(); for (String nominal : request.getClassVars()) { fv.addElement(nominal); } Attribute attr1 = new Attribute("class", fv, 1); fvWekaAttributes.addElement(attr0); fvWekaAttributes.addElement(attr1); Instances ins = new Instances("attr-reln", fvWekaAttributes, request.getData().length); ins.setClassIndex(1); for (Text s : request.getData()) { Instance i = new Instance(2); i.setValue(attr0, s.getText()); i.setValue(attr1, s.getTclass()); ins.add(i); } return new Dataset(ins); } return null; }
From source file:com.tum.classifiertest.FastRfUtils.java
License:Open Source License
/** * Produces a random permutation of the values of an attribute in a dataset using Knuth shuffle. * <p/>/*from w ww. ja v a2 s .c om*/ * Copies back the current values of the previously scrambled attribute and uses the given permutation * to scramble the values of the new attribute all by copying from the original dataset. * * @param src the source dataset * @param dst the scrambled dataset * @param attIndex the attribute index * @param perm the random permutation * * @return fluent */ public static Instances scramble(Instances src, Instances dst, final int attIndex, int[] perm) { for (int i = 0; i < src.numInstances(); i++) { Instance scrambled = dst.instance(i); if (attIndex > 0) scrambled.setValue(attIndex - 1, src.instance(i).value(attIndex - 1)); scrambled.setValue(attIndex, src.instance(perm[i]).value(attIndex)); } return dst; }
From source file:com.yahoo.research.scoring.classifier.NutchOnlineClassifier.java
License:Apache License
/** * Converts an {@link AnthURL} into an {@link Instance} which can be handled * by the {@link Classifier}./*from w w w . ja v a 2s . c om*/ * * @param url * the {@link AnthURL} which should be transformed/converted. * @return the resulting {@link Instance}. */ private static Instance convert(AnthURL url) { if (url != null) { Instance inst = new SparseInstance(dimension); inst.replaceMissingValues(replaceMissingValues); inst.setDataset(instances); inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem")); inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("domain"), url.uri.getHost()); Set<String> tokens = new HashSet<String>(); tokens.addAll(tokenizer(url.uri.getPath())); tokens.addAll(tokenizer(url.uri.getQuery())); tokens.addAll(tokenizer(url.uri.getFragment())); for (String tok : tokens) { inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1); } return inst; } else { System.out.println("Input AnthURL for convertion into instance was null."); return null; } }
From source file:com.zazhu.BlueHub.BlueHub.java
License:Apache License
/** * receives the last reads from the sensors and creates the features we use * only the acc x,y,z (either from internal or external sensor) * /*from w w w .j a va 2 s . c o m*/ * @param sensorQueue * @throws Exception */ private Instance processingSenseData(Queue<String> sensorQueue, char whatSensor) throws Exception { BufferedReader reader; Instances format; Instance newInstance = null; Log.d(TAG, "Queue size = " + mQueueSize); if (sensorQueue.size() <= 0) throw new Exception("Queue empty"); // create the arrays that will contain the accelerometer data // s.x s.y s.z double[] sx = new double[sensorQueue.size()]; double[] sy = new double[sensorQueue.size()]; double[] sz = new double[sensorQueue.size()]; String rawReading; StringTokenizer st; int index; if (D) Log.e(TAG, "+++ COMPUTING FEATURES +++"); // 1. collect raw data. what kind of sensing data? external vs. internal switch (whatSensor) { case EXTERNAL: index = 0; while ((rawReading = sensorQueue.poll()) != null) { // FORMAT: // "Time_SensorName_SensorNumber_Counter_Xacc_Yacc_Zacc_Xgyro_Ygyro_checksum" // position of the values needed: s.x = 4, s.y = 5, s.z = 6 st = new StringTokenizer(rawReading, FIELD_SEP); // not needed data for (int i = 0; i < 4; i++) st.nextToken(); // s.x, s.y, s.z sx[index] = Double.valueOf(st.nextToken()); sy[index] = Double.valueOf(st.nextToken()); sz[index] = Double.valueOf(st.nextToken()); index += 1; } // 2. process raw data // 2.1 read the input format for the instance (TODO must be changed to // use weka classes) reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_extern))); try { format = new Instances(reader); if (format.classIndex() == -1) format.setClassIndex(format.numAttributes() - 1); // 2.2 create a new instance newInstance = new DenseInstance(7); newInstance.setDataset(format); // set attributes newInstance.setValue(format.attribute(0), Feature.getStd(sx)); newInstance.setValue(format.attribute(1), Feature.getStd(sy)); newInstance.setValue(format.attribute(2), Feature.getStd(sz)); newInstance.setValue(format.attribute(3), Feature.getMean(sx)); newInstance.setValue(format.attribute(4), Feature.getMean(sy)); newInstance.setValue(format.attribute(5), Feature.getMean(sz)); // set unknown class newInstance.setMissing(format.attribute(6)); } catch (IOException e) { e.printStackTrace(); } break; case INTERNAL: index = 0; while ((rawReading = sensorQueue.poll()) != null) { // FORMAT "Xacc_Yacc_Zacc" // position of the values needed: s.x = 0, s.y = 1, s.z = 2 st = new StringTokenizer(rawReading, FIELD_SEP); // s.x, s.y, s.z sx[index] = Double.valueOf(st.nextToken()); sy[index] = Double.valueOf(st.nextToken()); sz[index] = Double.valueOf(st.nextToken()); index += 1; } // 2. process raw data // 2.1 read the input format for the instance (TODO must be changed to // use weka classes) reader = new BufferedReader(new InputStreamReader(getResources().openRawResource(R.raw.format_intern))); try { format = new Instances(reader); if (format.classIndex() == -1) format.setClassIndex(format.numAttributes() - 1); // 2.2 create a new instance newInstance = new DenseInstance(7); newInstance.setDataset(format); // set attributes newInstance.setValue(format.attribute(0), Feature.getStd(sx)); newInstance.setValue(format.attribute(1), Feature.getStd(sy)); newInstance.setValue(format.attribute(2), Feature.getStd(sz)); newInstance.setValue(format.attribute(3), Feature.getMean(sx)); newInstance.setValue(format.attribute(4), Feature.getMean(sy)); newInstance.setValue(format.attribute(5), Feature.getMean(sz)); // set unknown class newInstance.setMissing(format.attribute(6)); } catch (IOException e) { e.printStackTrace(); } break; default: if (D) Log.e(TAG, "+++ COMPUTING FEATURES: NO VALUE FOR THE SENSOR READING +++"); break; } return newInstance; }