Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity)

Source Link

Document

Creates an empty set of instances.

Usage

From source file:cyber009.udal.mains.WekaUDAL.java

@SuppressWarnings("unchecked")
public void updateLabelDataSet() {
    int count = 0;
    Instances temp = new Instances(data.unLabelDataSets, 0, 0);
    data.infoFWunLabel = (HashMap<Integer, Double>) Utilitys.sortByValue((Map) data.infoFWunLabel);
    for (Map.Entry<Integer, Double> entrySet : data.infoFWunLabel.entrySet()) {
        int index = entrySet.getKey();
        if (count < data.N_FL) {
            //                System.out.println(index + " : "
            //                        +entrySet.getValue() + " : "
            //                        + data.unLabelDataSets.instance(index).toString());
            func.syntacticLabelFunction(data.unLabelDataSets.get(index));
            data.labelDataSets.add(data.unLabelDataSets.get(index));
        } else {/* ww w.  j ava  2 s . c o  m*/
            temp.add(data.unLabelDataSets.get(index));
        }
        count++;
    }
    data.infoFWunLabel.clear();
    data.unLabelDataSets.clear();
    data.unLabelDataSets.addAll(temp);
    //System.out.println("------------------------------------------");
}

From source file:cz.vse.fis.keg.entityclassifier.core.salience.EntitySaliencer.java

License:Open Source License

public void computeSalience(List<Entity> entities) {
    try {//from   w  w  w . ja va 2s  .  c  om
        if (!initialized) {
            initialize();
            initialized = true;
        }

        ArrayList<SEntity> processedEntities = new ArrayList<SEntity>();

        for (Entity e : entities) {
            SEntity entityMention = new SEntity();
            entityMention.setBeginIndex(e.getStartOffset().intValue());
            entityMention.setEntityType(e.getEntityType());

            ArrayList<Type> types = e.getTypes();
            ArrayList<String> loggedURIs = new ArrayList<String>();

            if (types != null) {
                for (Type t : types) {
                    String entityURI = t.getEntityURI();

                    if (!loggedURIs.contains(entityURI)) {
                        loggedURIs.add(entityURI);
                        entityMention.getUrls().add(entityURI);
                    }
                }
            }

            boolean entityAlreadyLogged = false;

            for (SEntity sEntity : processedEntities) {
                boolean isThisEntitySame = false;
                ArrayList<String> entityURIs1 = sEntity.getUrls();
                ArrayList<String> entityURIs2 = entityMention.getUrls();

                for (String eURI1 : entityURIs1) {
                    for (String eURI2 : entityURIs2) {
                        if (!entityAlreadyLogged) {
                            if (eURI1.equals(eURI2)) {
                                entityAlreadyLogged = true;
                                isThisEntitySame = true;
                                sEntity.setNumOccurrences(sEntity.getNumOccurrences() + 1);
                            }
                        }
                    }
                }

                if (isThisEntitySame) {
                    for (String uri : entityMention.getUrls()) {
                        if (!sEntity.getUrls().contains(uri)) {
                            sEntity.getUrls().add(uri);
                        }
                    }
                }
            }

            // Entity seen for first time in the document.
            if (!entityAlreadyLogged) {
                entityMention.setNumOccurrences(1);
                processedEntities.add(entityMention);
            }
        }

        // Preparing the test data container.
        FastVector attributes = new FastVector(6);
        attributes.add(new Attribute("beginIndex"));
        attributes.add(new Attribute("numUniqueEntitiesInDoc"));
        attributes.add(new Attribute("numOfOccurrencesOfEntityInDoc"));
        attributes.add(new Attribute("numOfEntityMentionsInDoc"));

        FastVector entityTypeNominalAttVal = new FastVector(2);
        entityTypeNominalAttVal.addElement("named_entity");
        entityTypeNominalAttVal.addElement("common_entity");

        Attribute entityTypeAtt = new Attribute("type", entityTypeNominalAttVal);
        attributes.add(entityTypeAtt);
        FastVector classNominalAttVal = new FastVector(3);
        classNominalAttVal.addElement("not_salient");
        classNominalAttVal.addElement("less_salient");
        classNominalAttVal.addElement("most_salient");
        Attribute classAtt = new Attribute("class", classNominalAttVal);
        attributes.add(classAtt);
        Instances evalData = new Instances("MyRelation", attributes, 0);

        evalData.setClassIndex(evalData.numAttributes() - 1);

        for (int i = 0; i < processedEntities.size(); i++) {

            String entityType = "";
            if (processedEntities.get(i).getEntityType().equals("named entity")) {
                entityType = "named_entity";
            } else if (processedEntities.get(i).getEntityType().equals("common entity")) {
                entityType = "common_entity";
            } else {
            }
            Instance inst = new DenseInstance(6);
            inst.setValue(evalData.attribute(0), processedEntities.get(i).getBeginIndex()); // begin index
            inst.setValue(evalData.attribute(1), processedEntities.size()); // num of unique entities in doc
            inst.setValue(evalData.attribute(2), processedEntities.get(i).getNumOccurrences()); // num of entity occurrences in doc
            inst.setValue(evalData.attribute(3), entities.size()); // num of entity mentions in doc
            inst.setValue(evalData.attribute(4), entityType); // type of the entity
            evalData.add(inst);

        }

        for (int i = 0; i < processedEntities.size(); i++) {
            SEntity sEntity = processedEntities.get(i);
            int classIndex = (int) classifier.classifyInstance(evalData.get(i));
            String classLabel = evalData.firstInstance().classAttribute().value(classIndex);
            double pred[] = classifier.distributionForInstance(evalData.get(i));
            double probability = pred[classIndex];

            double salienceScore = pred[1] * 0.5 + pred[2];
            sEntity.setSalienceScore(salienceScore);
            sEntity.setSalienceConfidence(probability);
            sEntity.setSalienceClass(classLabel);

            for (Entity e : entities) {
                ArrayList<Type> types = e.getTypes();
                if (types != null) {
                    for (Type t : types) {
                        if (sEntity.getUrls().contains(t.getEntityURI())) {
                            Salience s = new Salience();
                            s.setClassLabel(classLabel);
                            DecimalFormat df = new DecimalFormat("0.000");
                            double fProbability = df.parse(df.format(probability)).doubleValue();
                            double fSalience = df.parse(df.format(salienceScore)).doubleValue();
                            s.setConfidence(fProbability);
                            s.setScore(fSalience);
                            t.setSalience(s);
                        }
                    }
                }
            }
        }

    } catch (Exception ex) {
        Logger.getLogger(EntitySaliencer.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Set up the header for the PC->original space dataset
 * /*from   ww  w  .j a  v  a2s  .  co m*/
 * @return            the output format
 * @throws Exception  if something goes wrong
 */
private Instances setOutputFormatOriginal() throws Exception {
    FastVector attributes = new FastVector();

    for (int i = 0; i < m_numAttribs; i++) {
        String att = m_trainInstances.attribute(i).name();
        attributes.addElement(new Attribute(att));
    }

    if (m_hasClass) {
        attributes.addElement(m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainHeader.relationName() + "->PC->original space", attributes,
            0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    return outputFormat;
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Set the format for the transformed data
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 *//*from ww w .  j av a  2 s.c  o m*/
private Instances setOutputFormat() throws Exception {
    if (m_eigenvalues == null) {
        return null;
    }

    double cumulative = 0.0;
    FastVector attributes = new FastVector();
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        StringBuffer attName = new StringBuffer();
        // build array of coefficients
        double[] coeff_mags = new double[m_numAttribs];
        for (int j = 0; j < m_numAttribs; j++)
            coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
        int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
        // this array contains the sorted indices of the coefficients
        int[] coeff_inds;
        if (m_numAttribs > 0) {
            // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude
            coeff_inds = Utils.sort(coeff_mags);
        } else {
            // if  m_maxAttrsInName <= 0, use all coeffs in original order
            coeff_inds = new int[m_numAttribs];
            for (int j = 0; j < m_numAttribs; j++)
                coeff_inds[j] = j;
        }
        // build final attName string
        for (int j = 0; j < num_attrs; j++) {
            double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
            if (j > 0 && coeff_value >= 0)
                attName.append("+");
            attName.append(
                    Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name());
        }
        if (num_attrs < m_numAttribs)
            attName.append("...");

        attributes.addElement(new Attribute(attName.toString()));
        cumulative += m_eigenvalues[m_sortedEigens[i]];

        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (m_hasClass) {
        attributes.addElement(m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components",
            attributes, 0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    m_outputNumAtts = outputFormat.numAttributes();
    return outputFormat;
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.CrossValidationProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();
    Instances trainingSet = new Instances("Classes", new ArrayList<Attribute>(attributeList), 9);
    trainingSet.setClassIndex(0);/*from   w  ww.  jav a  2s . co m*/

    HashMap<String, HashSet<TrackSegment>> dataset = getInferenceModel().getInput().getTrainingsSet();

    for (Entry<String, HashSet<TrackSegment>> entry : dataset.entrySet()) {
        for (TrackSegment trackSegment : entry.getValue()) {
            Instance instance = getInstance(entry.getKey(), trackSegment);
            trainingSet.add(instance);
        }
    }

    assert trainingSet.numInstances() > 0 : "Training set is empty and has no instances"; //NO18N

    evaluate(trainingSet);
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.InferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();//  w w w.  j a va 2s .  c  om

    Classifier classifier = getInferenceModel().getClassifier();
    HashSet<TrackSegment> inferenceDataSet = getInferenceDataSet();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (TrackSegment segment : inferenceDataSet) {
            Instance instance = getInstance(segment);
            unlabeledInstances.add(instance);
            segmentList.add(segment);
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.SpecialInferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();//from   www  . j  a v a2s . c o  m

    Classifier classifier = getInferenceModel().getClassifier();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (Entry<String, HashSet<TrackSegment>> entry : getInferenceModel().getInput().getTrainingsSet()
                .entrySet()) {
            for (TrackSegment segment : entry.getValue()) {
                segment.setLabel(entry.getKey());
                Instance instance = getInstance(segment);
                unlabeledInstances.add(instance);
                segmentList.add(segment);
            }
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Map.Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.TrainingsDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    final ProgressHandle handle = ProgressHandleFactory.createHandle("Trainings");
    try {/*w  ww  .  jav a2  s.  c o m*/
        handle.start();
        Collection<Attribute> attributeCollection = getInferenceModel().getAttributes();
        ArrayList<Attribute> arrayList = new ArrayList<Attribute>(attributeCollection);
        Instances trainingSet = new Instances("Classes", arrayList, 0);
        trainingSet.setClassIndex(0);

        Instances testingSet = new Instances("Classes", arrayList, 0);
        testingSet.setClassIndex(0);

        HashMap<String, HashSet<TrackSegment>> dataset = getInferenceModel().getInput().getTrainingsSet();

        int datasetCount = 0;
        for (HashSet<TrackSegment> list : dataset.values()) {
            for (TrackSegment trackSegment : list) {
                datasetCount += trackSegment.getWayPointList().size();
            }
        }
        handle.switchToDeterminate(datasetCount);
        int trackCount = 0;
        for (Entry<String, HashSet<TrackSegment>> entry : dataset.entrySet()) {

            int trainingsSetSize = (int) Math.ceil(entry.getValue().size() * getTrainingsSetRatioParameter());
            int index = 0;
            for (TrackSegment trackSegment : entry.getValue()) {
                Instance instance = getInstance(entry.getKey(), trackSegment);

                if (index < trainingsSetSize) {
                    trainingSet.add(instance);
                } else {
                    testingSet.add(instance);
                }
                handle.progress(trackCount++);
                index++;
            }
        }

        assert trainingSet.numInstances() > 0 : "Training set is empty and has no instances"; //NO18N
        assert testingSet.numInstances() > 0 : "Testing set is empty and has no instances"; //NO18N
        handle.switchToIndeterminate();
        evaluate(trainingSet, testingSet);
    } finally {
        handle.finish();
    }
}

From source file:de.ugoe.cs.cpdp.dataselection.DecisionTreeSelection.java

License:Apache License

@Override
public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
    final Instances data = characteristicInstances(testdata, traindataSet);

    final ArrayList<String> attVals = new ArrayList<String>();
    attVals.add("same");
    attVals.add("more");
    attVals.add("less");
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();
    for (int j = 0; j < data.numAttributes(); j++) {
        atts.add(new Attribute(data.attribute(j).name(), attVals));
    }//from   w  w w . jav  a  2  s. c  o  m
    atts.add(new Attribute("score"));
    Instances similarityData = new Instances("similarity", atts, 0);
    similarityData.setClassIndex(similarityData.numAttributes() - 1);

    try {
        Classifier classifier = new J48();
        for (int i = 0; i < traindataSet.size(); i++) {
            classifier.buildClassifier(traindataSet.get(i));
            for (int j = 0; j < traindataSet.size(); j++) {
                if (i != j) {
                    double[] similarity = new double[data.numAttributes() + 1];
                    for (int k = 0; k < data.numAttributes(); k++) {
                        if (0.9 * data.get(i + 1).value(k) > data.get(j + 1).value(k)) {
                            similarity[k] = 2.0;
                        } else if (1.1 * data.get(i + 1).value(k) < data.get(j + 1).value(k)) {
                            similarity[k] = 1.0;
                        } else {
                            similarity[k] = 0.0;
                        }
                    }

                    Evaluation eval = new Evaluation(traindataSet.get(j));
                    eval.evaluateModel(classifier, traindataSet.get(j));
                    similarity[data.numAttributes()] = eval.fMeasure(1);
                    similarityData.add(new DenseInstance(1.0, similarity));
                }
            }
        }
        REPTree repTree = new REPTree();
        if (repTree.getNumFolds() > similarityData.size()) {
            repTree.setNumFolds(similarityData.size());
        }
        repTree.setNumFolds(2);
        repTree.buildClassifier(similarityData);

        Instances testTrainSimilarity = new Instances(similarityData);
        testTrainSimilarity.clear();
        for (int i = 0; i < traindataSet.size(); i++) {
            double[] similarity = new double[data.numAttributes() + 1];
            for (int k = 0; k < data.numAttributes(); k++) {
                if (0.9 * data.get(0).value(k) > data.get(i + 1).value(k)) {
                    similarity[k] = 2.0;
                } else if (1.1 * data.get(0).value(k) < data.get(i + 1).value(k)) {
                    similarity[k] = 1.0;
                } else {
                    similarity[k] = 0.0;
                }
            }
            testTrainSimilarity.add(new DenseInstance(1.0, similarity));
        }

        int bestScoringProductIndex = -1;
        double maxScore = Double.MIN_VALUE;
        for (int i = 0; i < traindataSet.size(); i++) {
            double score = repTree.classifyInstance(testTrainSimilarity.get(i));
            if (score > maxScore) {
                maxScore = score;
                bestScoringProductIndex = i;
            }
        }
        Instances bestScoringProduct = traindataSet.get(bestScoringProductIndex);
        traindataSet.clear();
        traindataSet.add(bestScoringProduct);
    } catch (Exception e) {
        Console.printerr("failure during DecisionTreeSelection: " + e.getMessage());
        throw new RuntimeException(e);
    }
}

From source file:de.ugoe.cs.cpdp.dataselection.SetWiseEMContextSelection.java

License:Apache License

/**
 * Returns test- and training data with only the project context factors which were chosen in
 * the configuration. This is later used for clustering.
 * //from   www.j  a  v  a  2s. c  o m
 * @param testdata
 * @param traindataSet
 * @return
 */
protected Instances getContextFactors(Instances testdata, SetUniqueList<Instances> traindataSet) {
    // setup weka Instances for clustering
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    // we only want the project context factors
    for (String pcf : this.project_context_factors) {
        atts.add(new Attribute(pcf));
    }

    // set up the data
    final Instances data = new Instances("project_context_factors", atts, 0);
    double[] instanceValues = new double[atts.size()];

    // only project context factors + only one instance per project needed
    int i = 0;
    for (String pcf : this.project_context_factors) {
        instanceValues[i] = testdata.instance(0).value(testdata.attribute(pcf));
        // Console.traceln(Level.INFO, "adding attribute: " + pcf + " value: " +
        // instanceValues[i]);
        i++;
    }
    data.add(new DenseInstance(1.0, instanceValues));

    // now for the projects of the training stet
    for (Instances traindata : traindataSet) {
        instanceValues = new double[atts.size()]; // ohne das hier immer dieselben werte?!
        i = 0;
        for (String pcf : this.project_context_factors) {
            instanceValues[i] = traindata.instance(0).value(traindata.attribute(pcf));
            // Console.traceln(Level.INFO, "adding attribute: " + pcf + " value: " +
            // instanceValues[i]);
            i++;
        }

        data.add(new DenseInstance(1.0, instanceValues));
    }

    return data;
}