List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:FFNN.MultiplePerceptron.java
@Override public double classifyInstance(Instance i) { ArrayList<Double> listInput = new ArrayList<>(); //mengisi nilai listInput dengan nilai di instances listInput.add(1.0);/* w w w . j a va 2 s.c om*/ for (int index = 0; index < i.numAttributes() - 1; index++) listInput.add(i.value(index)); ArrayList<Double> listOutputHidden = new ArrayList<>(); listNodeHidden.get(0).setValue(1.0); listOutputHidden.add(1.0); //menghitung output hidden layer for (int index = 1; index < listNodeHidden.size(); index++) {//dari 1 karena node 0 ada bias double value = listNodeHidden.get(index).output(listInput); // listNodeHidden.get(index).setValue(value); listOutputHidden.add(value); } //menghitung output output layer for (int index = 0; index < listNodeOutput.size(); index++) { double value = listNodeOutput.get(index).output(listOutputHidden); listNodeOutput.get(index).setValue(value); } return maxValue(listNodeOutput); }
From source file:filters.MauiFilter.java
License:Open Source License
/** * Converts an instance./* w w w. j a v a2 s . com*/ */ private FastVector convertInstance(Instance instance, boolean training) throws Exception { FastVector vector = new FastVector(); String fileName = instance.stringValue(fileNameAtt); if (debugMode) { System.err.println("-- Converting instance for document " + fileName); } // Get the key phrases for the document HashMap<String, Counter> hashKeyphrases = null; if (!instance.isMissing(keyphrasesAtt)) { String keyphrases = instance.stringValue(keyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases); } // Get the document text String documentText = instance.stringValue(documentAtt); // Compute the candidate topics HashMap<String, Candidate> candidateList; if (allCandidates != null && allCandidates.containsKey(instance)) { candidateList = allCandidates.get(instance); } else { candidateList = getCandidates(documentText); } if (debugMode) { System.err.println(candidateList.size() + " candidates "); } // Set indices for key attributes int tfidfAttIndex = documentAtt + 2; int distAttIndex = documentAtt + 3; int probsAttIndex = documentAtt + numFeatures; int countPos = 0; int countNeg = 0; // Go through the phrases and convert them into instances for (Candidate candidate : candidateList.values()) { if (candidate.getFrequency() < minOccurFrequency) { continue; } String name = candidate.getName(); String orig = candidate.getBestFullForm(); if (!vocabularyName.equals("none")) { orig = candidate.getTitle(); } double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(classifierData); // Get probability of a phrase being key phrase double[] probs = classifier.distributionForInstance(inst); double prob = probs[0]; if (nominalClassValue) { prob = probs[1]; } // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures + 2]; int pos = 0; for (int i = 1; i < instance.numAttributes(); i++) { if (i == documentAtt) { // output of values for a given phrase: // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(name); newInst[pos++] = index; // Add original version if (orig != null) { index = outputFormatPeek().attribute(pos).addStringValue(orig); } else { index = outputFormatPeek().attribute(pos).addStringValue(name); } newInst[pos++] = index; // Add features newInst[pos++] = inst.value(tfIndex); newInst[pos++] = inst.value(idfIndex); newInst[pos++] = inst.value(tfidfIndex); newInst[pos++] = inst.value(firstOccurIndex); newInst[pos++] = inst.value(lastOccurIndex); newInst[pos++] = inst.value(spreadOccurIndex); newInst[pos++] = inst.value(domainKeyphIndex); newInst[pos++] = inst.value(lengthIndex); newInst[pos++] = inst.value(generalityIndex); newInst[pos++] = inst.value(nodeDegreeIndex); newInst[pos++] = inst.value(semRelIndex); newInst[pos++] = inst.value(wikipKeyphrIndex); newInst[pos++] = inst.value(invWikipFreqIndex); newInst[pos++] = inst.value(totalWikipKeyphrIndex); // Add probability probsAttIndex = pos; newInst[pos++] = prob; // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); } else if (i == keyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); if (inst.classValue() == 0) { countNeg++; } else { countPos++; } } if (debugMode) { System.err.println(countPos + " positive; " + countNeg + " negative instances"); } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // Short cut: if phrase very unlikely make rank very low and // continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current // phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } currentInstance.setValue(probsAttIndex + 1, rank++); } return vector; }
From source file:functions.kernelPerceptron.java
License:Open Source License
@Override public void trainOnInstanceImpl(Instance inst) { //Init Perceptron if (this.reset == true) { this.reset = false; this.numberAttributes = inst.numAttributes(); this.numberClasses = inst.numClasses(); this.weightAttribute = new double[inst.numClasses()][inst.numAttributes()]; for (int i = 0; i < inst.numClasses(); i++) { for (int j = 0; j < inst.numAttributes(); j++) { weightAttribute[i][j] = 0.2 * this.classifierRandom.nextDouble() - 0.1; }//from w ww .ja v a 2 s. com } } double[] preds = new double[inst.numClasses()]; for (int i = 0; i < inst.numClasses(); i++) { preds[i] = prediction(inst, i); } double learningRatio = learningRatioOption.getValue(); int actualClass = (int) inst.classValue(); for (int i = 0; i < inst.numClasses(); i++) { double actual = (i == actualClass) ? 1.0 : 0.0; double delta = (actual - preds[i]) * preds[i] * (1 - preds[i]); for (int j = 0; j < inst.numAttributes() - 1; j++) { this.weightAttribute[i][j] += learningRatio * delta * inst.value(j); } //this.weightAttribute[i][inst.numAttributes() - 1] += learningRatio * delta; } }
From source file:functions.kernelPerceptron.java
License:Open Source License
public double prediction(Instance inst, int classVal) { double sum = 0.0; for (int i = 0; i < inst.numAttributes() - 1; i++) { sum += weightAttribute[classVal][i] * inst.value(i); }/*from ww w.j a v a 2s . c om*/ //sum += weightAttribute[classVal][inst.numAttributes() - 1]; return 1.0 / (1.0 + Math.exp(-sum)); }
From source file:gate.creole.kea.Kea.java
License:Open Source License
/** * Executes this PR. Depeding on the state of the {@link #trainingMode} switch * it will either train a model or apply it over the documents.<br> * Trainig consists of collecting keyphrase annotations from the input * annotation set of the input documents. The first time a trained model is * required (either application mode has started or the model is being saved) * the actual model ({link @ #keaModel}) will be constructed.<br> * The application mode consists of using a trained model to generate * keyphrase annotations on the output annotation set of the input documents. * @throws ExecutionException/*from w w w.java2 s . c om*/ */ public void execute() throws gate.creole.ExecutionException { //reinitialise the KEA filter if already trained if (trainingMode.booleanValue() && trainingFinished) { //retrainig started with a used model System.out.println("Reinitialising KEA model..."); try { initModel(); } catch (Exception e) { throw new ExecutionException(e); } } //get the clear text from the document. String text = document.getContent().toString(); //generate the first attribute: the text //this will be used for both training and application modes. double[] newInst = new double[2]; newInst[0] = data.attribute(0).addStringValue(text); if (trainingMode.booleanValue()) { //training mode -> we need to collect the keyphrases //find the input annotation set. AnnotationSet annSet = inputAS == null || inputAS.length() == 0 ? document.getAnnotations() : document.getAnnotations(inputAS); //extract the keyphrase annotations AnnotationSet kpSet = annSet.get(keyphraseAnnotationType); if (kpSet != null && kpSet.size() > 0) { //use a set to avoid repetitions Set<String> keyPhrases = new HashSet<String>(); Iterator<Annotation> keyPhraseIter = kpSet.iterator(); //initialise the string for the second attribute String keyPhrasesStr = ""; while (keyPhraseIter.hasNext()) { //get one keyphrase annotation Annotation aKeyPhrase = keyPhraseIter.next(); try { //get the string for the keyphrase annotation String keyPhraseStr = document.getContent() .getContent(aKeyPhrase.getStartNode().getOffset(), aKeyPhrase.getEndNode().getOffset()) .toString(); //if the keyphrase has not been seen before add to the string for //the second attribute if (keyPhrases.add(keyPhraseStr)) keyPhrasesStr += keyPhrasesStr.length() > 0 ? "\n" + keyPhraseStr : keyPhraseStr; } catch (InvalidOffsetException ioe) { throw new ExecutionException(ioe); } } //all the keyphrases have been enumerated -> create the second attribute newInst[1] = data.attribute(1).addStringValue(keyPhrasesStr); } else { //no keyphrase annotations newInst[1] = Instance.missingValue(); System.out.println("No keyphrases in document: " + document.getName()); } //add the new instance to the dataset data.add(new Instance(1.0, newInst)); try { keaFilter.input(data.instance(0)); } catch (Exception e) { throw new ExecutionException(e); } data = data.stringFreeStructure(); } else { //application mode -> we need to generate keyphrases //build the model if not already done if (!trainingFinished) finishTraining(); newInst[1] = Instance.missingValue(); data.add(new Instance(1.0, newInst)); try { keaFilter.input(data.instance(0)); } catch (Exception e) { throw new ExecutionException(e); } data = data.stringFreeStructure(); //extract the output from the model Instance[] topRankedInstances = new Instance[phrasesToExtract.intValue()]; Instance inst; while ((inst = keaFilter.output()) != null) { int index = (int) inst.value(keaFilter.getRankIndex()) - 1; if (index < phrasesToExtract.intValue()) { topRankedInstances[index] = inst; } } //annotate the document with the results -> create a list with all the //keyphrases found by KEA List<String> phrases = new ArrayList<String>(); for (int i = 0; i < topRankedInstances.length; i++) { if (topRankedInstances[i] != null) { phrases.add(topRankedInstances[i].stringValue(keaFilter.getUnstemmedPhraseIndex())); } } try { //add the actiul annotations on the document annotateKeyPhrases(phrases); } catch (Exception e) { throw new ExecutionException(e); } } //application mode }
From source file:hr.irb.fastRandomForest.FastRandomTree.java
License:Open Source License
/** * Computes class distribution of an instance using the FastRandomTree.<p> * * In Weka's RandomTree, the distributions were normalized so that all * probabilities sum to 1; this would abolish the effect of instance weights * on voting. In FastRandomForest 0.97 onwards, the distributions are * normalized by dividing with the number of instances going into a leaf.<p> * /* w w w . j a v a 2 s. c o m*/ * @param instance the instance to compute the distribution for * @return the computed class distribution * @throws Exception if computation fails */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] returnedDist = null; if (m_Attribute > -1) { // ============================ node is not a leaf if (instance.isMissing(m_Attribute)) { // ---------------- missing value returnedDist = new double[m_MotherForest.m_Info.numClasses()]; // split instance up for (int i = 0; i < m_Successors.length; i++) { double[] help = m_Successors[i].distributionForInstance(instance); if (help != null) { for (int j = 0; j < help.length; j++) { returnedDist[j] += m_Prop[i] * help[j]; } } } } else if (m_MotherForest.m_Info.attribute(m_Attribute).isNominal()) { // ------ nominal //returnedDist = m_Successors[(int) instance.value(m_Attribute)] // .distributionForInstance(instance); // 0.99: new - binary splits (also) for nominal attributes if (instance.value(m_Attribute) == m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } else { // ------------------------------------------ numeric attributes if (instance.value(m_Attribute) < m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } return returnedDist; } else { // =============================================== node is a leaf return m_ClassProbs; } }
From source file:hr.irb.fastRandomForest.NakedFastRandomTree.java
License:Open Source License
public NakedFastRandomTree getNodeForInstance(final Instance i, final int max_depth) { if (max_depth == 0) { return this; }//from ww w.j av a 2 s . co m if (m_Attribute == -1) { // Leaf node return this; } if (i.isMissing(m_Attribute)) { throw new IllegalStateException("NakedFastRandomTree does not support missing attributes"); } final int next_depth = max_depth - 1; final NakedFastRandomTree succ; if (m_MotherForest.m_Info.attribute(m_Attribute).isNominal()) { // nominal // 0.99: new - binary splits (also) for nominal attributes if (i.value(m_Attribute) == m_SplitPoint) { succ = (NakedFastRandomTree) m_Successors[0]; } else { succ = (NakedFastRandomTree) m_Successors[1]; } } else { // numeric if (i.value(m_Attribute) < m_SplitPoint) { succ = (NakedFastRandomTree) m_Successors[0]; } else { succ = (NakedFastRandomTree) m_Successors[1]; } } return succ.getNodeForInstance(i, next_depth); }
From source file:id3.MyID3.java
/** * Mengklasifikasikan instance//from w ww.j a v a 2s.c om * @param instance data yang ingin di klasifikasikan * @return hasil klasifikasi * @throws NoSupportForMissingValuesException */ public double classifyInstance(Instance instance) throws NoSupportForMissingValuesException { if (instance.hasMissingValue()) { throw new NoSupportForMissingValuesException("Cannot handle missing value"); } if (currentAttribute == null) { return classLabel; } else { return subTree[(int) instance.value(currentAttribute)].classifyInstance(instance); } }
From source file:id3.MyID3.java
/** * Menghitung pendistribusian class dalam instances * @param instance data yang ingin dihitung distribusinya * @return distribusi kelas dari instance * @throws NoSupportForMissingValuesException *//* w w w . ja v a2s . co m*/ public double[] distributionForInstance(Instance instance) throws NoSupportForMissingValuesException { if (instance.hasMissingValue()) { throw new NoSupportForMissingValuesException("Cannot handle missing value"); } if (currentAttribute == null) { return classDistributionAmongInstances; } else { return subTree[(int) instance.value(currentAttribute)].distributionForInstance(instance); } }
From source file:id3classifier.ID3Classifiers.java
private Pair<Boolean, Double> sameClass(List<Instance> instances) { // sets the class index equal to the first instance's class index // creates tempValue of type double which is equal to not a number int classIndex = instances.get(0).classIndex(); double tmpValue = Double.NaN; // for each instance in instances... for (Instance instance : instances) { // if tmpValue of type double is not a number... if (Double.isNaN(tmpValue)) { // reassign tmpValue to the value of the instance at classIndex tmpValue = instance.value(classIndex); } else {//from w w w .j a v a2 s .c om // else assign val equal to the value of instance at classIndex double val = instance.value(classIndex); // if value of type double is a number... then if it is not // equal to tmpValue... if (!Double.isNaN(val)) { if (val != tmpValue) { // then return a new Pair return new Pair<>(false, Double.NaN); } } } } // if you havent already... return a new Pair using tmpValue return new Pair<>(true, tmpValue); }