Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:core.me.Context.java

License:Open Source License

public double[] getVirtualRelationship(Context p) throws Exception {

    Classifiers cc = Classifiers.get();/* w  ww.  j a  v a2s. c  o  m*/
    Instances dataStruc = cc.getDataStructRC();

    double H = 0, D = 0, DX = 0.;
    int parentClass = 1;

    H = this.getH(p);
    D = this.getD(p);
    DX = this.getDX(p);
    parentClass = p.getSymbolClass();

    double[] values = new double[6];
    values[0] = H;
    values[1] = D;
    values[2] = DX;
    values[3] = dataStruc.attribute(3).indexOfValue("" + this.theClass.get());
    values[4] = dataStruc.attribute(4).indexOfValue("" + parentClass);
    values[5] = dataStruc.attribute(5).indexOfValue("0");

    Instance inst = new Instance(1.0, values);
    inst.setDataset(dataStruc);
    inst.setClassMissing();

    return cc.getVirtualRelationship(inst);
}

From source file:core.TextDirectoryLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined by a
 * call to getStructure then method should do so before processing the rest of
 * the data set./*from  w  w w . j a v a2s.c  o  m*/
 *
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException if there is no source or parsing fails
 */
@Override
public Instances getDataSet() throws IOException {
    if (getDirectory() == null) {
        throw new IOException("No directory/source has been specified");
    }

    String directoryPath = getDirectory().getAbsolutePath();
    ArrayList<String> classes = new ArrayList<String>();
    Enumeration<Object> enm = getStructure().classAttribute().enumerateValues();
    while (enm.hasMoreElements()) {
        Object oo = enm.nextElement();
        if (oo instanceof SerializedObject) {
            classes.add(((SerializedObject) oo).getObject().toString());
        } else {
            classes.add(oo.toString());
        }
    }

    Instances data = getStructure();
    int fileCount = 0;
    for (int k = 0; k < classes.size(); k++) {
        String subdirPath = classes.get(k);
        File subdir = new File(directoryPath + File.separator + subdirPath);
        String[] files = subdir.list();
        for (String file : files) {
            try {
                fileCount++;
                if (getDebug()) {
                    System.err.println("processing " + fileCount + " : " + subdirPath + " : " + file);
                }

                double[] newInst = null;
                if (m_OutputFilename) {
                    newInst = new double[3];
                } else {
                    newInst = new double[2];
                }
                File txt = new File(directoryPath + File.separator + subdirPath + File.separator + file);
                BufferedReader is;
                if (m_charSet == null || m_charSet.length() == 0) {
                    is = new BufferedReader(new InputStreamReader(new FileInputStream(txt)));
                } else {
                    is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet));
                }
                StringBuffer txtStr = new StringBuffer();
                /*int c;
                while ((c = is.read()) != -1) {
                  txtStr.append((char) c);
                }*/

                FileReader fr = new FileReader(txt);
                BufferedReader br = new BufferedReader(fr);
                String line;
                while ((line = br.readLine()) != null) {
                    txtStr.append(line + System.getProperty("line.separator"));
                }

                newInst[0] = data.attribute(0).addStringValue(txtStr.toString());
                if (m_OutputFilename) {
                    newInst[1] = data.attribute(1).addStringValue(subdirPath + File.separator + file);
                }
                newInst[data.classIndex()] = k;
                data.add(new DenseInstance(1.0, newInst));
                is.close();
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath
                        + File.separator + file);
            }
        }
    }

    return data;
}

From source file:core.TextDirectoryLoader.java

License:Open Source License

/**
 * Process input directories/files incrementally.
 *
 * @param structure ignored//w  w w .ja v a2  s  .co m
 * @return never returns without throwing an exception
 * @throws IOException if a problem occurs
 */
@Override
public Instance getNextInstance(Instances structure) throws IOException {
    // throw new
    // IOException("TextDirectoryLoader can't read data sets incrementally.");

    String directoryPath = getDirectory().getAbsolutePath();
    Attribute classAtt = structure.classAttribute();
    if (m_filesByClass == null) {
        m_filesByClass = new ArrayList<LinkedList<String>>();
        for (int i = 0; i < classAtt.numValues(); i++) {
            File classDir = new File(directoryPath + File.separator + classAtt.value(i));
            String[] files = classDir.list();
            LinkedList<String> classDocs = new LinkedList<String>();
            for (String cd : files) {
                File txt = new File(directoryPath + File.separator + classAtt.value(i) + File.separator + cd);
                if (txt.isFile()) {
                    classDocs.add(cd);
                }
            }
            m_filesByClass.add(classDocs);
        }
    }

    // cycle through the classes
    int count = 0;
    LinkedList<String> classContents = m_filesByClass.get(m_lastClassDir);
    boolean found = (classContents.size() > 0);
    while (classContents.size() == 0) {
        m_lastClassDir++;
        count++;
        if (m_lastClassDir == structure.classAttribute().numValues()) {
            m_lastClassDir = 0;
        }
        classContents = m_filesByClass.get(m_lastClassDir);
        if (classContents.size() > 0) {
            found = true; // we have an instance we can create
            break;
        }
        if (count == structure.classAttribute().numValues()) {
            break; // must be finished
        }
    }

    if (found) {
        String nextDoc = classContents.poll();
        File txt = new File(
                directoryPath + File.separator + classAtt.value(m_lastClassDir) + File.separator + nextDoc);

        BufferedReader is;
        if (m_charSet == null || m_charSet.length() == 0) {
            is = new BufferedReader(new InputStreamReader(new FileInputStream(txt)));
        } else {
            is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet));
        }
        StringBuffer txtStr = new StringBuffer();
        int c;
        while ((c = is.read()) != -1) {
            txtStr.append((char) c);
        }

        double[] newInst = null;
        if (m_OutputFilename) {
            newInst = new double[3];
        } else {
            newInst = new double[2];
        }

        newInst[0] = 0;
        structure.attribute(0).setStringValue(txtStr.toString());

        if (m_OutputFilename) {
            newInst[1] = 0;
            structure.attribute(1).setStringValue(txt.getAbsolutePath());
        }
        newInst[structure.classIndex()] = m_lastClassDir;
        Instance inst = new DenseInstance(1.0, newInst);
        inst.setDataset(structure);
        is.close();

        m_lastClassDir++;
        if (m_lastClassDir == structure.classAttribute().numValues()) {
            m_lastClassDir = 0;
        }

        return inst;
    } else {
        return null; // done!
    }
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Prints the header for the predictions output into a supplied StringBuffer
 *
 * @param test structure of the test set to print predictions for
 * @param attributesToOutput indices of the attributes to output
 * @param printDistribution prints the complete distribution for nominal
 * attributes, not just the predicted value
 * @param text the StringBuffer to print to
 *///from  w  w  w. ja v  a  2 s .  c o m
protected static void printClassificationsHeader(Instances test, Range attributesToOutput,
        boolean printDistribution, StringBuffer text) {
    // print header
    if (test.classAttribute().isNominal())
        if (printDistribution)
            text.append(" inst#     actual  predicted error distribution");
        else
            text.append(" inst#     actual  predicted error prediction");
    else
        text.append(" inst#     actual  predicted      error");
    if (attributesToOutput != null) {
        attributesToOutput.setUpper(test.numAttributes() - 1);
        text.append(" (");
        boolean first = true;
        for (int i = 0; i < test.numAttributes(); i++) {
            if (i == test.classIndex())
                continue;

            if (attributesToOutput.isInRange(i)) {
                if (!first)
                    text.append(",");
                text.append(test.attribute(i).name());
                first = false;
            }
        }
        text.append(")");
    }
    text.append("\n");
}

From source file:cyber009.udal.functions.StatisticalAnalysis.java

/**
 * /*from   w  w  w .  j  av a2  s  . com*/
 * @param dataSet
 * @param classTarget
 * @return 
 */
public double probabilityOfTargerClass(Instances dataSet, double classTarget) {
    AttributeStats classStats = dataSet.attributeStats(dataSet.classIndex());
    double ptc = 0.0D;
    if (classStats.nominalCounts != null) {
        for (int i = 0; i < classStats.nominalCounts.length; i++) {
            if (new Double(dataSet.attribute(dataSet.classIndex()).value(i)) == classTarget) {
                ptc = (double) classStats.nominalCounts[i] / (double) classStats.totalCount;
            }
        }
    }
    return ptc;
}

From source file:cyber009.udal.functions.StatisticalAnalysis.java

/**
 * /*from w  w  w. j  a va 2s  .  c  o m*/
 * @param classifier
 * @param trainingDataSet
 * @param unLabelDataSets
 * @param unLabelSet
 * @param classTarget
 * @return 
 */
public double conditionalEntropy(Classifier classifier, Instances trainingDataSet, Instances unLabelDataSets,
        Instance unLabelSet, double classTarget) {
    double cEnt = 0.0D;
    double entropy = 0.0D;
    unLabelSet.setClassValue(classTarget);
    trainingDataSet.add(trainingDataSet.numInstances(), unLabelSet);
    AttributeStats classStats = trainingDataSet.attributeStats(trainingDataSet.classIndex());
    for (Instance set : unLabelDataSets) {
        if (instanceCMPWithoutClass(set, unLabelSet) == true)
            continue;
        for (int i = 0; i < classStats.nominalCounts.length; i++) {
            double target = new Double(trainingDataSet.attribute(trainingDataSet.classIndex()).value(i));
            set.setClassValue(target);
            entropy = posteriorDistribution(classifier, trainingDataSet, set, classTarget);
            //System.out.println("entropy:"+entropy);
            cEnt += -(entropy) * Math.log10(entropy);
            set.setClassMissing();
        }
    }
    trainingDataSet.remove(trainingDataSet.numInstances() - 1);
    return cEnt;
}

From source file:cz.vse.fis.keg.entityclassifier.core.salience.EntitySaliencer.java

License:Open Source License

public void computeSalience(List<Entity> entities) {
    try {/*from   w  ww. j a  v a 2  s.c  om*/
        if (!initialized) {
            initialize();
            initialized = true;
        }

        ArrayList<SEntity> processedEntities = new ArrayList<SEntity>();

        for (Entity e : entities) {
            SEntity entityMention = new SEntity();
            entityMention.setBeginIndex(e.getStartOffset().intValue());
            entityMention.setEntityType(e.getEntityType());

            ArrayList<Type> types = e.getTypes();
            ArrayList<String> loggedURIs = new ArrayList<String>();

            if (types != null) {
                for (Type t : types) {
                    String entityURI = t.getEntityURI();

                    if (!loggedURIs.contains(entityURI)) {
                        loggedURIs.add(entityURI);
                        entityMention.getUrls().add(entityURI);
                    }
                }
            }

            boolean entityAlreadyLogged = false;

            for (SEntity sEntity : processedEntities) {
                boolean isThisEntitySame = false;
                ArrayList<String> entityURIs1 = sEntity.getUrls();
                ArrayList<String> entityURIs2 = entityMention.getUrls();

                for (String eURI1 : entityURIs1) {
                    for (String eURI2 : entityURIs2) {
                        if (!entityAlreadyLogged) {
                            if (eURI1.equals(eURI2)) {
                                entityAlreadyLogged = true;
                                isThisEntitySame = true;
                                sEntity.setNumOccurrences(sEntity.getNumOccurrences() + 1);
                            }
                        }
                    }
                }

                if (isThisEntitySame) {
                    for (String uri : entityMention.getUrls()) {
                        if (!sEntity.getUrls().contains(uri)) {
                            sEntity.getUrls().add(uri);
                        }
                    }
                }
            }

            // Entity seen for first time in the document.
            if (!entityAlreadyLogged) {
                entityMention.setNumOccurrences(1);
                processedEntities.add(entityMention);
            }
        }

        // Preparing the test data container.
        FastVector attributes = new FastVector(6);
        attributes.add(new Attribute("beginIndex"));
        attributes.add(new Attribute("numUniqueEntitiesInDoc"));
        attributes.add(new Attribute("numOfOccurrencesOfEntityInDoc"));
        attributes.add(new Attribute("numOfEntityMentionsInDoc"));

        FastVector entityTypeNominalAttVal = new FastVector(2);
        entityTypeNominalAttVal.addElement("named_entity");
        entityTypeNominalAttVal.addElement("common_entity");

        Attribute entityTypeAtt = new Attribute("type", entityTypeNominalAttVal);
        attributes.add(entityTypeAtt);
        FastVector classNominalAttVal = new FastVector(3);
        classNominalAttVal.addElement("not_salient");
        classNominalAttVal.addElement("less_salient");
        classNominalAttVal.addElement("most_salient");
        Attribute classAtt = new Attribute("class", classNominalAttVal);
        attributes.add(classAtt);
        Instances evalData = new Instances("MyRelation", attributes, 0);

        evalData.setClassIndex(evalData.numAttributes() - 1);

        for (int i = 0; i < processedEntities.size(); i++) {

            String entityType = "";
            if (processedEntities.get(i).getEntityType().equals("named entity")) {
                entityType = "named_entity";
            } else if (processedEntities.get(i).getEntityType().equals("common entity")) {
                entityType = "common_entity";
            } else {
            }
            Instance inst = new DenseInstance(6);
            inst.setValue(evalData.attribute(0), processedEntities.get(i).getBeginIndex()); // begin index
            inst.setValue(evalData.attribute(1), processedEntities.size()); // num of unique entities in doc
            inst.setValue(evalData.attribute(2), processedEntities.get(i).getNumOccurrences()); // num of entity occurrences in doc
            inst.setValue(evalData.attribute(3), entities.size()); // num of entity mentions in doc
            inst.setValue(evalData.attribute(4), entityType); // type of the entity
            evalData.add(inst);

        }

        for (int i = 0; i < processedEntities.size(); i++) {
            SEntity sEntity = processedEntities.get(i);
            int classIndex = (int) classifier.classifyInstance(evalData.get(i));
            String classLabel = evalData.firstInstance().classAttribute().value(classIndex);
            double pred[] = classifier.distributionForInstance(evalData.get(i));
            double probability = pred[classIndex];

            double salienceScore = pred[1] * 0.5 + pred[2];
            sEntity.setSalienceScore(salienceScore);
            sEntity.setSalienceConfidence(probability);
            sEntity.setSalienceClass(classLabel);

            for (Entity e : entities) {
                ArrayList<Type> types = e.getTypes();
                if (types != null) {
                    for (Type t : types) {
                        if (sEntity.getUrls().contains(t.getEntityURI())) {
                            Salience s = new Salience();
                            s.setClassLabel(classLabel);
                            DecimalFormat df = new DecimalFormat("0.000");
                            double fProbability = df.parse(df.format(probability)).doubleValue();
                            double fSalience = df.parse(df.format(salienceScore)).doubleValue();
                            s.setConfidence(fProbability);
                            s.setScore(fSalience);
                            t.setSalience(s);
                        }
                    }
                }
            }
        }

    } catch (Exception ex) {
        Logger.getLogger(EntitySaliencer.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Return a summary of the analysis//from  w  ww.j  ava  2  s.  co  m
 * @return a summary of the analysis.
 */
private String principalComponentsSummary() {
    StringBuffer result = new StringBuffer();
    double cumulative = 0.0;
    Instances output = null;
    int numVectors = 0;

    try {
        output = setOutputFormat();
        numVectors = (output.classIndex() < 0) ? output.numAttributes() : output.numAttributes() - 1;
    } catch (Exception ex) {
    }
    //tomorrow
    String corrCov = (m_center) ? "Covariance " : "Correlation ";
    result.append(corrCov + "matrix\n" + matrixToString(m_correlation) + "\n\n");
    result.append("eigenvalue\tproportion\tcumulative\n");
    for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {
        cumulative += m_eigenvalues[m_sortedEigens[i]];
        result.append(Utils.doubleToString(m_eigenvalues[m_sortedEigens[i]], 9, 5) + "\t"
                + Utils.doubleToString((m_eigenvalues[m_sortedEigens[i]] / m_sumOfEigenValues), 9, 5) + "\t"
                + Utils.doubleToString((cumulative / m_sumOfEigenValues), 9, 5) + "\t"
                + output.attribute(m_numAttribs - i - 1).name() + "\n");
    }

    result.append("\nEigenvectors\n");
    for (int j = 1; j <= numVectors; j++) {
        result.append(" V" + j + '\t');
    }
    result.append("\n");
    for (int j = 0; j < m_numAttribs; j++) {

        for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {
            result.append(Utils.doubleToString(m_eigenvectors[j][m_sortedEigens[i]], 7, 4) + "\t");
        }
        result.append(m_trainInstances.attribute(j).name() + '\n');
    }

    if (m_transBackToOriginal) {
        result.append("\nPC space transformed back to original space.\n"
                + "(Note: can't evaluate attributes in the original " + "space)\n");
    }
    return result.toString();
}

From source file:de.ugoe.cs.cpdp.dataprocessing.AttributeNonRemoval.java

License:Apache License

/**
 * @see de.ugoe.cs.cpdp.dataprocessing.SetWiseProcessingStrategy#apply(weka.core.Instances,
 *      org.apache.commons.collections4.list.SetUniqueList)
 *//*w w  w.  j  a v a 2 s.c o  m*/
@Override
public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
    for (String attributeName : attributeNames) {
        for (int i = 0; i < testdata.numAttributes(); i++) {
            if (!attributeName.equals(testdata.attribute(i).name())) {
                testdata.deleteAttributeAt(i);
                for (Instances traindata : traindataSet) {
                    traindata.deleteAttributeAt(i);
                }
            }
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataprocessing.AttributeNonRemoval.java

License:Apache License

/**
 * @see de.ugoe.cs.cpdp.dataprocessing.ProcessesingStrategy#apply(weka.core.Instances,
 *      weka.core.Instances)//from  ww w  .j  a v  a2 s.  c  om
 */
@Override
public void apply(Instances testdata, Instances traindata) {
    for (int i = testdata.numAttributes() - 1; i >= 0; i--) {
        if (!attributeNames.contains(testdata.attribute(i).name())) {
            testdata.deleteAttributeAt(i);
            traindata.deleteAttributeAt(i);
        }
    }
}