Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity) 

Source Link

Document

Creates an empty set of instances.

Usage

From source file:cyber009.udal.mains.WekaUDAL.java

@SuppressWarnings("unchecked")
public void updateLabelDataSet() {
    int count = 0;
    Instances temp = new Instances(data.unLabelDataSets, 0, 0);
    data.infoFWunLabel = (HashMap<Integer, Double>) Utilitys.sortByValue((Map) data.infoFWunLabel);
    for (Map.Entry<Integer, Double> entrySet : data.infoFWunLabel.entrySet()) {
        int index = entrySet.getKey();
        if (count < data.N_FL) {
            //                System.out.println(index + " : "
            //                        +entrySet.getValue() + " : "
            //                        + data.unLabelDataSets.instance(index).toString());
            func.syntacticLabelFunction(data.unLabelDataSets.get(index));
            data.labelDataSets.add(data.unLabelDataSets.get(index));
        } else {/* ww w.  j ava  2 s . c o  m*/
            temp.add(data.unLabelDataSets.get(index));
        }
        count++;
    }
    data.infoFWunLabel.clear();
    data.unLabelDataSets.clear();
    data.unLabelDataSets.addAll(temp);
    //System.out.println("------------------------------------------");
}

From source file:cz.vse.fis.keg.entityclassifier.core.salience.EntitySaliencer.java

License:Open Source License

public void computeSalience(List<Entity> entities) {
    try {//from   w  w  w . ja va 2s  .  c  om
        if (!initialized) {
            initialize();
            initialized = true;
        }

        ArrayList<SEntity> processedEntities = new ArrayList<SEntity>();

        for (Entity e : entities) {
            SEntity entityMention = new SEntity();
            entityMention.setBeginIndex(e.getStartOffset().intValue());
            entityMention.setEntityType(e.getEntityType());

            ArrayList<Type> types = e.getTypes();
            ArrayList<String> loggedURIs = new ArrayList<String>();

            if (types != null) {
                for (Type t : types) {
                    String entityURI = t.getEntityURI();

                    if (!loggedURIs.contains(entityURI)) {
                        loggedURIs.add(entityURI);
                        entityMention.getUrls().add(entityURI);
                    }
                }
            }

            boolean entityAlreadyLogged = false;

            for (SEntity sEntity : processedEntities) {
                boolean isThisEntitySame = false;
                ArrayList<String> entityURIs1 = sEntity.getUrls();
                ArrayList<String> entityURIs2 = entityMention.getUrls();

                for (String eURI1 : entityURIs1) {
                    for (String eURI2 : entityURIs2) {
                        if (!entityAlreadyLogged) {
                            if (eURI1.equals(eURI2)) {
                                entityAlreadyLogged = true;
                                isThisEntitySame = true;
                                sEntity.setNumOccurrences(sEntity.getNumOccurrences() + 1);
                            }
                        }
                    }
                }

                if (isThisEntitySame) {
                    for (String uri : entityMention.getUrls()) {
                        if (!sEntity.getUrls().contains(uri)) {
                            sEntity.getUrls().add(uri);
                        }
                    }
                }
            }

            // Entity seen for first time in the document.
            if (!entityAlreadyLogged) {
                entityMention.setNumOccurrences(1);
                processedEntities.add(entityMention);
            }
        }

        // Preparing the test data container.
        FastVector attributes = new FastVector(6);
        attributes.add(new Attribute("beginIndex"));
        attributes.add(new Attribute("numUniqueEntitiesInDoc"));
        attributes.add(new Attribute("numOfOccurrencesOfEntityInDoc"));
        attributes.add(new Attribute("numOfEntityMentionsInDoc"));

        FastVector entityTypeNominalAttVal = new FastVector(2);
        entityTypeNominalAttVal.addElement("named_entity");
        entityTypeNominalAttVal.addElement("common_entity");

        Attribute entityTypeAtt = new Attribute("type", entityTypeNominalAttVal);
        attributes.add(entityTypeAtt);
        FastVector classNominalAttVal = new FastVector(3);
        classNominalAttVal.addElement("not_salient");
        classNominalAttVal.addElement("less_salient");
        classNominalAttVal.addElement("most_salient");
        Attribute classAtt = new Attribute("class", classNominalAttVal);
        attributes.add(classAtt);
        Instances evalData = new Instances("MyRelation", attributes, 0);

        evalData.setClassIndex(evalData.numAttributes() - 1);

        for (int i = 0; i < processedEntities.size(); i++) {

            String entityType = "";
            if (processedEntities.get(i).getEntityType().equals("named entity")) {
                entityType = "named_entity";
            } else if (processedEntities.get(i).getEntityType().equals("common entity")) {
                entityType = "common_entity";
            } else {
            }
            Instance inst = new DenseInstance(6);
            inst.setValue(evalData.attribute(0), processedEntities.get(i).getBeginIndex()); // begin index
            inst.setValue(evalData.attribute(1), processedEntities.size()); // num of unique entities in doc
            inst.setValue(evalData.attribute(2), processedEntities.get(i).getNumOccurrences()); // num of entity occurrences in doc
            inst.setValue(evalData.attribute(3), entities.size()); // num of entity mentions in doc
            inst.setValue(evalData.attribute(4), entityType); // type of the entity
            evalData.add(inst);

        }

        for (int i = 0; i < processedEntities.size(); i++) {
            SEntity sEntity = processedEntities.get(i);
            int classIndex = (int) classifier.classifyInstance(evalData.get(i));
            String classLabel = evalData.firstInstance().classAttribute().value(classIndex);
            double pred[] = classifier.distributionForInstance(evalData.get(i));
            double probability = pred[classIndex];

            double salienceScore = pred[1] * 0.5 + pred[2];
            sEntity.setSalienceScore(salienceScore);
            sEntity.setSalienceConfidence(probability);
            sEntity.setSalienceClass(classLabel);

            for (Entity e : entities) {
                ArrayList<Type> types = e.getTypes();
                if (types != null) {
                    for (Type t : types) {
                        if (sEntity.getUrls().contains(t.getEntityURI())) {
                            Salience s = new Salience();
                            s.setClassLabel(classLabel);
                            DecimalFormat df = new DecimalFormat("0.000");
                            double fProbability = df.parse(df.format(probability)).doubleValue();
                            double fSalience = df.parse(df.format(salienceScore)).doubleValue();
                            s.setConfidence(fProbability);
                            s.setScore(fSalience);
                            t.setSalience(s);
                        }
                    }
                }
            }
        }

    } catch (Exception ex) {
        Logger.getLogger(EntitySaliencer.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Set up the header for the PC->original space dataset
 * /*from   ww  w  .j a  v  a2s  .  co m*/
 * @return            the output format
 * @throws Exception  if something goes wrong
 */
private Instances setOutputFormatOriginal() throws Exception {
    FastVector attributes = new FastVector();

    for (int i = 0; i < m_numAttribs; i++) {
        String att = m_trainInstances.attribute(i).name();
        attributes.addElement(new Attribute(att));
    }

    if (m_hasClass) {
        attributes.addElement(m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainHeader.relationName() + "->PC->original space", attributes,
            0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    return outputFormat;
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Set the format for the transformed data
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 *//*from ww w .  j av a  2 s.c  o m*/
private Instances setOutputFormat() throws Exception {
    if (m_eigenvalues == null) {
        return null;
    }

    double cumulative = 0.0;
    FastVector attributes = new FastVector();
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        StringBuffer attName = new StringBuffer();
        // build array of coefficients
        double[] coeff_mags = new double[m_numAttribs];
        for (int j = 0; j < m_numAttribs; j++)
            coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
        int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
        // this array contains the sorted indices of the coefficients
        int[] coeff_inds;
        if (m_numAttribs > 0) {
            // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude
            coeff_inds = Utils.sort(coeff_mags);
        } else {
            // if  m_maxAttrsInName <= 0, use all coeffs in original order
            coeff_inds = new int[m_numAttribs];
            for (int j = 0; j < m_numAttribs; j++)
                coeff_inds[j] = j;
        }
        // build final attName string
        for (int j = 0; j < num_attrs; j++) {
            double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
            if (j > 0 && coeff_value >= 0)
                attName.append("+");
            attName.append(
                    Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name());
        }
        if (num_attrs < m_numAttribs)
            attName.append("...");

        attributes.addElement(new Attribute(attName.toString()));
        cumulative += m_eigenvalues[m_sortedEigens[i]];

        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (m_hasClass) {
        attributes.addElement(m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components",
            attributes, 0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    m_outputNumAtts = outputFormat.numAttributes();
    return outputFormat;
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.CrossValidationProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();
    Instances trainingSet = new Instances("Classes", new ArrayList<Attribute>(attributeList), 9);
    trainingSet.setClassIndex(0);/*from   w  ww.  jav a  2s . co m*/

    HashMap<String, HashSet<TrackSegment>> dataset = getInferenceModel().getInput().getTrainingsSet();

    for (Entry<String, HashSet<TrackSegment>> entry : dataset.entrySet()) {
        for (TrackSegment trackSegment : entry.getValue()) {
            Instance instance = getInstance(entry.getKey(), trackSegment);
            trainingSet.add(instance);
        }
    }

    assert trainingSet.numInstances() > 0 : "Training set is empty and has no instances"; //NO18N

    evaluate(trainingSet);
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.InferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();//  w w w.  j a va 2s .  c  om

    Classifier classifier = getInferenceModel().getClassifier();
    HashSet<TrackSegment> inferenceDataSet = getInferenceDataSet();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (TrackSegment segment : inferenceDataSet) {
            Instance instance = getInstance(segment);
            unlabeledInstances.add(instance);
            segmentList.add(segment);
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.SpecialInferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();//from   www  . j  a v a2s . c o  m

    Classifier classifier = getInferenceModel().getClassifier();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (Entry<String, HashSet<TrackSegment>> entry : getInferenceModel().getInput().getTrainingsSet()
                .entrySet()) {
            for (TrackSegment segment : entry.getValue()) {
                segment.setLabel(entry.getKey());
                Instance instance = getInstance(segment);
                unlabeledInstances.add(instance);
                segmentList.add(segment);
            }
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Map.Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.TrainingsDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    final ProgressHandle handle = ProgressHandleFactory.createHandle("Trainings");
    try {/*w  ww  .  jav a2  s.  c o m*/
        handle.start();
        Collection<Attribute> attributeCollection = getInferenceModel().getAttributes();
        ArrayList<Attribute> arrayList = new ArrayList<Attribute>(attributeCollection);
        Instances trainingSet = new Instances("Classes", arrayList, 0);
        trainingSet.setClassIndex(0);

        Instances testingSet = new Instances("Classes", arrayList, 0);
        testingSet.setClassIndex(0);

        HashMap<String, HashSet<TrackSegment>> dataset = getInferenceModel().getInput().getTrainingsSet();

        int datasetCount = 0;
        for (HashSet<TrackSegment> list : dataset.values()) {
            for (TrackSegment trackSegment : list) {
                datasetCount += trackSegment.getWayPointList().size();
            }
        }
        handle.switchToDeterminate(datasetCount);
        int trackCount = 0;
        for (Entry<String, HashSet<TrackSegment>> entry : dataset.entrySet()) {

            int trainingsSetSize = (int) Math.ceil(entry.getValue().size() * getTrainingsSetRatioParameter());
            int index = 0;
            for (TrackSegment trackSegment : entry.getValue()) {
                Instance instance = getInstance(entry.getKey(), trackSegment);

                if (index < trainingsSetSize) {
                    trainingSet.add(instance);
                } else {
                    testingSet.add(instance);
                }
                handle.progress(trackCount++);
                index++;
            }
        }

        assert trainingSet.numInstances() > 0 : "Training set is empty and has no instances"; //NO18N
        assert testingSet.numInstances() > 0 : "Testing set is empty and has no instances"; //NO18N
        handle.switchToIndeterminate();
        evaluate(trainingSet, testingSet);
    } finally {
        handle.finish();
    }
}

From source file:de.ugoe.cs.cpdp.dataselection.DecisionTreeSelection.java

License:Apache License

@Override
public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
    final Instances data = characteristicInstances(testdata, traindataSet);

    final ArrayList<String> attVals = new ArrayList<String>();
    attVals.add("same");
    attVals.add("more");
    attVals.add("less");
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();
    for (int j = 0; j < data.numAttributes(); j++) {
        atts.add(new Attribute(data.attribute(j).name(), attVals));
    }//from   w  w w . jav  a  2  s. c  o  m
    atts.add(new Attribute("score"));
    Instances similarityData = new Instances("similarity", atts, 0);
    similarityData.setClassIndex(similarityData.numAttributes() - 1);

    try {
        Classifier classifier = new J48();
        for (int i = 0; i < traindataSet.size(); i++) {
            classifier.buildClassifier(traindataSet.get(i));
            for (int j = 0; j < traindataSet.size(); j++) {
                if (i != j) {
                    double[] similarity = new double[data.numAttributes() + 1];
                    for (int k = 0; k < data.numAttributes(); k++) {
                        if (0.9 * data.get(i + 1).value(k) > data.get(j + 1).value(k)) {
                            similarity[k] = 2.0;
                        } else if (1.1 * data.get(i + 1).value(k) < data.get(j + 1).value(k)) {
                            similarity[k] = 1.0;
                        } else {
                            similarity[k] = 0.0;
                        }
                    }

                    Evaluation eval = new Evaluation(traindataSet.get(j));
                    eval.evaluateModel(classifier, traindataSet.get(j));
                    similarity[data.numAttributes()] = eval.fMeasure(1);
                    similarityData.add(new DenseInstance(1.0, similarity));
                }
            }
        }
        REPTree repTree = new REPTree();
        if (repTree.getNumFolds() > similarityData.size()) {
            repTree.setNumFolds(similarityData.size());
        }
        repTree.setNumFolds(2);
        repTree.buildClassifier(similarityData);

        Instances testTrainSimilarity = new Instances(similarityData);
        testTrainSimilarity.clear();
        for (int i = 0; i < traindataSet.size(); i++) {
            double[] similarity = new double[data.numAttributes() + 1];
            for (int k = 0; k < data.numAttributes(); k++) {
                if (0.9 * data.get(0).value(k) > data.get(i + 1).value(k)) {
                    similarity[k] = 2.0;
                } else if (1.1 * data.get(0).value(k) < data.get(i + 1).value(k)) {
                    similarity[k] = 1.0;
                } else {
                    similarity[k] = 0.0;
                }
            }
            testTrainSimilarity.add(new DenseInstance(1.0, similarity));
        }

        int bestScoringProductIndex = -1;
        double maxScore = Double.MIN_VALUE;
        for (int i = 0; i < traindataSet.size(); i++) {
            double score = repTree.classifyInstance(testTrainSimilarity.get(i));
            if (score > maxScore) {
                maxScore = score;
                bestScoringProductIndex = i;
            }
        }
        Instances bestScoringProduct = traindataSet.get(bestScoringProductIndex);
        traindataSet.clear();
        traindataSet.add(bestScoringProduct);
    } catch (Exception e) {
        Console.printerr("failure during DecisionTreeSelection: " + e.getMessage());
        throw new RuntimeException(e);
    }
}

From source file:de.ugoe.cs.cpdp.dataselection.SetWiseEMContextSelection.java

License:Apache License

/**
 * Returns test- and training data with only the project context factors which were chosen in
 * the configuration. This is later used for clustering.
 * //from   www.j  a  v  a  2s. c  o m
 * @param testdata
 * @param traindataSet
 * @return
 */
protected Instances getContextFactors(Instances testdata, SetUniqueList<Instances> traindataSet) {
    // setup weka Instances for clustering
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    // we only want the project context factors
    for (String pcf : this.project_context_factors) {
        atts.add(new Attribute(pcf));
    }

    // set up the data
    final Instances data = new Instances("project_context_factors", atts, 0);
    double[] instanceValues = new double[atts.size()];

    // only project context factors + only one instance per project needed
    int i = 0;
    for (String pcf : this.project_context_factors) {
        instanceValues[i] = testdata.instance(0).value(testdata.attribute(pcf));
        // Console.traceln(Level.INFO, "adding attribute: " + pcf + " value: " +
        // instanceValues[i]);
        i++;
    }
    data.add(new DenseInstance(1.0, instanceValues));

    // now for the projects of the training stet
    for (Instances traindata : traindataSet) {
        instanceValues = new double[atts.size()]; // ohne das hier immer dieselben werte?!
        i = 0;
        for (String pcf : this.project_context_factors) {
            instanceValues[i] = traindata.instance(0).value(traindata.attribute(pcf));
            // Console.traceln(Level.INFO, "adding attribute: " + pcf + " value: " +
            // instanceValues[i]);
            i++;
        }

        data.add(new DenseInstance(1.0, instanceValues));
    }

    return data;
}