List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:core.me.Context.java
License:Open Source License
public double[] getVirtualRelationship(Context p) throws Exception { Classifiers cc = Classifiers.get();/* w ww. j a v a2s. c o m*/ Instances dataStruc = cc.getDataStructRC(); double H = 0, D = 0, DX = 0.; int parentClass = 1; H = this.getH(p); D = this.getD(p); DX = this.getDX(p); parentClass = p.getSymbolClass(); double[] values = new double[6]; values[0] = H; values[1] = D; values[2] = DX; values[3] = dataStruc.attribute(3).indexOfValue("" + this.theClass.get()); values[4] = dataStruc.attribute(4).indexOfValue("" + parentClass); values[5] = dataStruc.attribute(5).indexOfValue("0"); Instance inst = new Instance(1.0, values); inst.setDataset(dataStruc); inst.setClassMissing(); return cc.getVirtualRelationship(inst); }
From source file:core.TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined by a * call to getStructure then method should do so before processing the rest of * the data set./*from w w w . j a v a2s.c o m*/ * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ @Override public Instances getDataSet() throws IOException { if (getDirectory() == null) { throw new IOException("No directory/source has been specified"); } String directoryPath = getDirectory().getAbsolutePath(); ArrayList<String> classes = new ArrayList<String>(); Enumeration<Object> enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) { Object oo = enm.nextElement(); if (oo instanceof SerializedObject) { classes.add(((SerializedObject) oo).getObject().toString()); } else { classes.add(oo.toString()); } } Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = classes.get(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (String file : files) { try { fileCount++; if (getDebug()) { System.err.println("processing " + fileCount + " : " + subdirPath + " : " + file); } double[] newInst = null; if (m_OutputFilename) { newInst = new double[3]; } else { newInst = new double[2]; } File txt = new File(directoryPath + File.separator + subdirPath + File.separator + file); BufferedReader is; if (m_charSet == null || m_charSet.length() == 0) { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt))); } else { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet)); } StringBuffer txtStr = new StringBuffer(); /*int c; while ((c = is.read()) != -1) { txtStr.append((char) c); }*/ FileReader fr = new FileReader(txt); BufferedReader br = new BufferedReader(fr); String line; while ((line = br.readLine()) != null) { txtStr.append(line + System.getProperty("line.separator")); } newInst[0] = data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) { newInst[1] = data.attribute(1).addStringValue(subdirPath + File.separator + file); } newInst[data.classIndex()] = k; data.add(new DenseInstance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + file); } } } return data; }
From source file:core.TextDirectoryLoader.java
License:Open Source License
/** * Process input directories/files incrementally. * * @param structure ignored//w w w .ja v a2 s .co m * @return never returns without throwing an exception * @throws IOException if a problem occurs */ @Override public Instance getNextInstance(Instances structure) throws IOException { // throw new // IOException("TextDirectoryLoader can't read data sets incrementally."); String directoryPath = getDirectory().getAbsolutePath(); Attribute classAtt = structure.classAttribute(); if (m_filesByClass == null) { m_filesByClass = new ArrayList<LinkedList<String>>(); for (int i = 0; i < classAtt.numValues(); i++) { File classDir = new File(directoryPath + File.separator + classAtt.value(i)); String[] files = classDir.list(); LinkedList<String> classDocs = new LinkedList<String>(); for (String cd : files) { File txt = new File(directoryPath + File.separator + classAtt.value(i) + File.separator + cd); if (txt.isFile()) { classDocs.add(cd); } } m_filesByClass.add(classDocs); } } // cycle through the classes int count = 0; LinkedList<String> classContents = m_filesByClass.get(m_lastClassDir); boolean found = (classContents.size() > 0); while (classContents.size() == 0) { m_lastClassDir++; count++; if (m_lastClassDir == structure.classAttribute().numValues()) { m_lastClassDir = 0; } classContents = m_filesByClass.get(m_lastClassDir); if (classContents.size() > 0) { found = true; // we have an instance we can create break; } if (count == structure.classAttribute().numValues()) { break; // must be finished } } if (found) { String nextDoc = classContents.poll(); File txt = new File( directoryPath + File.separator + classAtt.value(m_lastClassDir) + File.separator + nextDoc); BufferedReader is; if (m_charSet == null || m_charSet.length() == 0) { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt))); } else { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } double[] newInst = null; if (m_OutputFilename) { newInst = new double[3]; } else { newInst = new double[2]; } newInst[0] = 0; structure.attribute(0).setStringValue(txtStr.toString()); if (m_OutputFilename) { newInst[1] = 0; structure.attribute(1).setStringValue(txt.getAbsolutePath()); } newInst[structure.classIndex()] = m_lastClassDir; Instance inst = new DenseInstance(1.0, newInst); inst.setDataset(structure); is.close(); m_lastClassDir++; if (m_lastClassDir == structure.classAttribute().numValues()) { m_lastClassDir = 0; } return inst; } else { return null; // done! } }
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * Prints the header for the predictions output into a supplied StringBuffer * * @param test structure of the test set to print predictions for * @param attributesToOutput indices of the attributes to output * @param printDistribution prints the complete distribution for nominal * attributes, not just the predicted value * @param text the StringBuffer to print to *///from w w w. ja v a 2 s . c o m protected static void printClassificationsHeader(Instances test, Range attributesToOutput, boolean printDistribution, StringBuffer text) { // print header if (test.classAttribute().isNominal()) if (printDistribution) text.append(" inst# actual predicted error distribution"); else text.append(" inst# actual predicted error prediction"); else text.append(" inst# actual predicted error"); if (attributesToOutput != null) { attributesToOutput.setUpper(test.numAttributes() - 1); text.append(" ("); boolean first = true; for (int i = 0; i < test.numAttributes(); i++) { if (i == test.classIndex()) continue; if (attributesToOutput.isInRange(i)) { if (!first) text.append(","); text.append(test.attribute(i).name()); first = false; } } text.append(")"); } text.append("\n"); }
From source file:cyber009.udal.functions.StatisticalAnalysis.java
/** * /*from w w w . j av a2 s . com*/ * @param dataSet * @param classTarget * @return */ public double probabilityOfTargerClass(Instances dataSet, double classTarget) { AttributeStats classStats = dataSet.attributeStats(dataSet.classIndex()); double ptc = 0.0D; if (classStats.nominalCounts != null) { for (int i = 0; i < classStats.nominalCounts.length; i++) { if (new Double(dataSet.attribute(dataSet.classIndex()).value(i)) == classTarget) { ptc = (double) classStats.nominalCounts[i] / (double) classStats.totalCount; } } } return ptc; }
From source file:cyber009.udal.functions.StatisticalAnalysis.java
/** * /*from w w w. j a va 2s . c o m*/ * @param classifier * @param trainingDataSet * @param unLabelDataSets * @param unLabelSet * @param classTarget * @return */ public double conditionalEntropy(Classifier classifier, Instances trainingDataSet, Instances unLabelDataSets, Instance unLabelSet, double classTarget) { double cEnt = 0.0D; double entropy = 0.0D; unLabelSet.setClassValue(classTarget); trainingDataSet.add(trainingDataSet.numInstances(), unLabelSet); AttributeStats classStats = trainingDataSet.attributeStats(trainingDataSet.classIndex()); for (Instance set : unLabelDataSets) { if (instanceCMPWithoutClass(set, unLabelSet) == true) continue; for (int i = 0; i < classStats.nominalCounts.length; i++) { double target = new Double(trainingDataSet.attribute(trainingDataSet.classIndex()).value(i)); set.setClassValue(target); entropy = posteriorDistribution(classifier, trainingDataSet, set, classTarget); //System.out.println("entropy:"+entropy); cEnt += -(entropy) * Math.log10(entropy); set.setClassMissing(); } } trainingDataSet.remove(trainingDataSet.numInstances() - 1); return cEnt; }
From source file:cz.vse.fis.keg.entityclassifier.core.salience.EntitySaliencer.java
License:Open Source License
public void computeSalience(List<Entity> entities) { try {/*from w ww. j a v a 2 s.c om*/ if (!initialized) { initialize(); initialized = true; } ArrayList<SEntity> processedEntities = new ArrayList<SEntity>(); for (Entity e : entities) { SEntity entityMention = new SEntity(); entityMention.setBeginIndex(e.getStartOffset().intValue()); entityMention.setEntityType(e.getEntityType()); ArrayList<Type> types = e.getTypes(); ArrayList<String> loggedURIs = new ArrayList<String>(); if (types != null) { for (Type t : types) { String entityURI = t.getEntityURI(); if (!loggedURIs.contains(entityURI)) { loggedURIs.add(entityURI); entityMention.getUrls().add(entityURI); } } } boolean entityAlreadyLogged = false; for (SEntity sEntity : processedEntities) { boolean isThisEntitySame = false; ArrayList<String> entityURIs1 = sEntity.getUrls(); ArrayList<String> entityURIs2 = entityMention.getUrls(); for (String eURI1 : entityURIs1) { for (String eURI2 : entityURIs2) { if (!entityAlreadyLogged) { if (eURI1.equals(eURI2)) { entityAlreadyLogged = true; isThisEntitySame = true; sEntity.setNumOccurrences(sEntity.getNumOccurrences() + 1); } } } } if (isThisEntitySame) { for (String uri : entityMention.getUrls()) { if (!sEntity.getUrls().contains(uri)) { sEntity.getUrls().add(uri); } } } } // Entity seen for first time in the document. if (!entityAlreadyLogged) { entityMention.setNumOccurrences(1); processedEntities.add(entityMention); } } // Preparing the test data container. FastVector attributes = new FastVector(6); attributes.add(new Attribute("beginIndex")); attributes.add(new Attribute("numUniqueEntitiesInDoc")); attributes.add(new Attribute("numOfOccurrencesOfEntityInDoc")); attributes.add(new Attribute("numOfEntityMentionsInDoc")); FastVector entityTypeNominalAttVal = new FastVector(2); entityTypeNominalAttVal.addElement("named_entity"); entityTypeNominalAttVal.addElement("common_entity"); Attribute entityTypeAtt = new Attribute("type", entityTypeNominalAttVal); attributes.add(entityTypeAtt); FastVector classNominalAttVal = new FastVector(3); classNominalAttVal.addElement("not_salient"); classNominalAttVal.addElement("less_salient"); classNominalAttVal.addElement("most_salient"); Attribute classAtt = new Attribute("class", classNominalAttVal); attributes.add(classAtt); Instances evalData = new Instances("MyRelation", attributes, 0); evalData.setClassIndex(evalData.numAttributes() - 1); for (int i = 0; i < processedEntities.size(); i++) { String entityType = ""; if (processedEntities.get(i).getEntityType().equals("named entity")) { entityType = "named_entity"; } else if (processedEntities.get(i).getEntityType().equals("common entity")) { entityType = "common_entity"; } else { } Instance inst = new DenseInstance(6); inst.setValue(evalData.attribute(0), processedEntities.get(i).getBeginIndex()); // begin index inst.setValue(evalData.attribute(1), processedEntities.size()); // num of unique entities in doc inst.setValue(evalData.attribute(2), processedEntities.get(i).getNumOccurrences()); // num of entity occurrences in doc inst.setValue(evalData.attribute(3), entities.size()); // num of entity mentions in doc inst.setValue(evalData.attribute(4), entityType); // type of the entity evalData.add(inst); } for (int i = 0; i < processedEntities.size(); i++) { SEntity sEntity = processedEntities.get(i); int classIndex = (int) classifier.classifyInstance(evalData.get(i)); String classLabel = evalData.firstInstance().classAttribute().value(classIndex); double pred[] = classifier.distributionForInstance(evalData.get(i)); double probability = pred[classIndex]; double salienceScore = pred[1] * 0.5 + pred[2]; sEntity.setSalienceScore(salienceScore); sEntity.setSalienceConfidence(probability); sEntity.setSalienceClass(classLabel); for (Entity e : entities) { ArrayList<Type> types = e.getTypes(); if (types != null) { for (Type t : types) { if (sEntity.getUrls().contains(t.getEntityURI())) { Salience s = new Salience(); s.setClassLabel(classLabel); DecimalFormat df = new DecimalFormat("0.000"); double fProbability = df.parse(df.format(probability)).doubleValue(); double fSalience = df.parse(df.format(salienceScore)).doubleValue(); s.setConfidence(fProbability); s.setScore(fSalience); t.setSalience(s); } } } } } } catch (Exception ex) { Logger.getLogger(EntitySaliencer.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Return a summary of the analysis//from w ww.j ava 2 s. co m * @return a summary of the analysis. */ private String principalComponentsSummary() { StringBuffer result = new StringBuffer(); double cumulative = 0.0; Instances output = null; int numVectors = 0; try { output = setOutputFormat(); numVectors = (output.classIndex() < 0) ? output.numAttributes() : output.numAttributes() - 1; } catch (Exception ex) { } //tomorrow String corrCov = (m_center) ? "Covariance " : "Correlation "; result.append(corrCov + "matrix\n" + matrixToString(m_correlation) + "\n\n"); result.append("eigenvalue\tproportion\tcumulative\n"); for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) { cumulative += m_eigenvalues[m_sortedEigens[i]]; result.append(Utils.doubleToString(m_eigenvalues[m_sortedEigens[i]], 9, 5) + "\t" + Utils.doubleToString((m_eigenvalues[m_sortedEigens[i]] / m_sumOfEigenValues), 9, 5) + "\t" + Utils.doubleToString((cumulative / m_sumOfEigenValues), 9, 5) + "\t" + output.attribute(m_numAttribs - i - 1).name() + "\n"); } result.append("\nEigenvectors\n"); for (int j = 1; j <= numVectors; j++) { result.append(" V" + j + '\t'); } result.append("\n"); for (int j = 0; j < m_numAttribs; j++) { for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) { result.append(Utils.doubleToString(m_eigenvectors[j][m_sortedEigens[i]], 7, 4) + "\t"); } result.append(m_trainInstances.attribute(j).name() + '\n'); } if (m_transBackToOriginal) { result.append("\nPC space transformed back to original space.\n" + "(Note: can't evaluate attributes in the original " + "space)\n"); } return result.toString(); }
From source file:de.ugoe.cs.cpdp.dataprocessing.AttributeNonRemoval.java
License:Apache License
/** * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances, * org.apache.commons.collections4.list.SetUniqueList) *//*w w w. j a v a 2 s.c o m*/ @Override public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { for (String attributeName : attributeNames) { for (int i = 0; i < testdata.numAttributes(); i++) { if (!attributeName.equals(testdata.attribute(i).name())) { testdata.deleteAttributeAt(i); for (Instances traindata : traindataSet) { traindata.deleteAttributeAt(i); } } } } }
From source file:de.ugoe.cs.cpdp.dataprocessing.AttributeNonRemoval.java
License:Apache License
/** * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances, * weka.core.Instances)//from ww w .j a v a2 s. c om */ @Override public void apply(Instances testdata, Instances traindata) { for (int i = testdata.numAttributes() - 1; i >= 0; i--) { if (!attributeNames.contains(testdata.attribute(i).name())) { testdata.deleteAttributeAt(i); traindata.deleteAttributeAt(i); } } }