Example usage for weka.core Instances get

Introduction

In this page you can find the example usage for weka.core Instances get.

Prototype



@Override
publicInstance get(int index)

Source Link

Document

Returns the instance at the given position.

Usage

From source file:nlpmusic.StringClusterer.java

public ArrayList<ArrayList<String>> cluster(ArrayList<String> tem) throws Exception {
    Instances source = listLoad(tem);/*www  .  j ava 2s .  c o  m*/

    StringToWordVector vect = new StringToWordVector();
    vect.setWordsToKeep(to_keep);
    vect.setInputFormat(source);
    Instances datas = Filter.useFilter(source, vect);
    //vect.setDoNotOperateOnPerClassBasis(true);        
    //System.out.println("ASDASD" + vect.wordsToKeepTipText());
    //System.out.println(datas.numAttributes());
    //System.out.println("ASDASD" + vect.getWordsToKeep());
    DBSCAN clusterer = new DBSCAN();
    clusterer.setEpsilon(threshold);
    clusterer.setMinPoints(min_points);

    clusterer.buildClusterer(datas);

    ArrayList<ArrayList<String>> ret = new ArrayList<>();

    for (int i = 0; i < clusterer.numberOfClusters(); i++) {
        ArrayList<String> to_add = new ArrayList<>();
        //System.out.println(i);
        for (int j = 0; j < datas.size(); j++) {
            try {
                if (clusterer.clusterInstance(datas.get(j)) == i)
                    //System.out.println("* " + source.get(j).toString() + " *");
                    to_add.add(source.get(j).toString());
            } catch (Exception e) {
                //e.printStackTrace();
            }
        }
        ret.add(to_add);
    }
    return ret;
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java

License:Open Source License

private Pair<IntervalRule, Double> createRule(Instances flatData, Instances miData, int iterations)
        throws Exception {
    // store for the distances between the reference distance and all others
    double[] distances = new double[flatData.numInstances()];
    // the distance function
    DistanceFunction distFunc = new EuclideanDistance(flatData);
    // permutation which sorts the distances
    Integer[] perm = new Integer[flatData.numInstances()];

    IntervalRule bestRule = null;//from www  . j  a  v a2s  . co m
    double bestRuleScore = -Double.MAX_VALUE;

    // retrieve the best rule heuristically for a number of iterations
    for (int ruleIterations = 0; ruleIterations < iterations; ruleIterations++) {

        // System.out.println("------- Iteration " + ruleIterations
        // + "----------");

        // randomly select an initial instance, i.e. selecting a positive
        // bag
        // randomly and taking the instance with the largest weight
        Random r = new Random();
        int bagIdx;
        while (miData.get(bagIdx = r.nextInt(miData.numInstances())).value(2) == 0)
            ;

        // the reference instance for the next rule
        Instance refInstance = miData.get(bagIdx).relationalValue(1).firstInstance();
        for (Instance i : miData.get(bagIdx).relationalValue(1)) {
            if (i.weight() > refInstance.weight()) {
                refInstance = i;
            }
        }

        // System.out.println("\tRef Instance: " + refInstance);

        IntervalRule rule = new IntervalRule();
        rule.updateClassifier(refInstance);

        // calculate the distance from that particular reference instance to
        // all other
        // positive instances (negatives are set to NaN) and sort them
        Arrays.fill(distances, Double.NaN);
        for (int i = 0; i < distances.length; i++) {
            if (flatData.get(i).classValue() == 1) {
                distances[i] = distFunc.distance(refInstance, flatData.get(i));
            }
        }
        PermutationSort.sortPermInPlace(distances, perm);

        double ruleScore = 0;
        double tmpRuleScore = 0;

        // extend the rule successively by the nearest instances till the
        // score doesn't increase anymore

        int instanceIdx = 0;
        while (true) {
            if (!Double.isNaN(distances[perm[instanceIdx]])) {
                IntervalRule tmpRule = new IntervalRule(rule);
                tmpRule.updateClassifier(flatData.get(perm[instanceIdx]));

                // System.out.println("\tNext Instance: "
                // + flatData.get(perm[instanceIdx]));
                // System.out.println("\tCurrent Rule: " + tmpRule);

                // evaluate rule
                tmpRuleScore = ruleScore(tmpRule, flatData);

                if (tmpRuleScore >= ruleScore) {
                    ruleScore = tmpRuleScore;
                    rule = tmpRule;
                } else {
                    break;
                }
            }
            instanceIdx++;
        }

        if (ruleScore > bestRuleScore) {
            bestRuleScore = ruleScore;
            bestRule = rule;
        }

    } // iterations per rule

    return new ValuePair<IntervalRule, Double>(bestRule, bestRuleScore);
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java

License:Open Source License

private Instances toSingleInstanceDataset(Instances miData, Instances flatData) throws Exception {
    MultiInstanceToPropositional convertToProp = new MultiInstanceToPropositional();

    convertToProp.setInputFormat(miData);

    for (int i = 0; i < miData.numInstances(); i++) {
        convertToProp.input(miData.instance(i));
    }//from w  w w.  j  a  va  2  s  .c o  m
    convertToProp.batchFinished();

    if (flatData == null) {
        flatData = convertToProp.getOutputFormat();
        flatData.deleteAttributeAt(0); // remove the bag index attribute

    }

    Instance processed;
    while ((processed = convertToProp.output()) != null) {
        processed.setDataset(null);
        processed.deleteAttributeAt(0); // remove the bag index attribute
        flatData.add(processed);
    }

    // remove class attribute
    // flatData.setClassIndex(-1);
    // flatData.deleteAttributeAt(flatData.numAttributes() - 1);

    // set weights
    int instanceIdx = 0;
    for (Instance bag : miData) {
        for (Instance instance : bag.relationalValue(1)) {
            flatData.get(instanceIdx).setWeight(instance.weight());
            instanceIdx++;
        }
    }
    return flatData;
}

From source file:org.openml.webapplication.features.ExtractFeatures.java

License:Open Source License

public static List<Feature> getFeatures(Instances dataset, String defaultClass) {
    if (defaultClass != null) {
        dataset.setClass(dataset.attribute(defaultClass));
    } else {//from  w  w  w.  ja  va2 s.  co m
        dataset.setClassIndex(dataset.numAttributes() - 1);
    }

    final ArrayList<Feature> resultFeatures = new ArrayList<Feature>();

    for (int i = 0; i < dataset.numAttributes(); i++) {
        Attribute att = dataset.attribute(i);
        int numValues = dataset.classAttribute().isNominal() ? dataset.classAttribute().numValues() : 0;
        AttributeStatistics attributeStats = new AttributeStatistics(dataset.attribute(i), numValues);

        for (int j = 0; j < dataset.numInstances(); ++j) {
            attributeStats.addValue(dataset.get(j).value(i), dataset.get(j).classValue());
        }

        String data_type = null;

        Integer numberOfDistinctValues = null;
        Integer numberOfUniqueValues = null;
        Integer numberOfMissingValues = null;
        Integer numberOfIntegerValues = null;
        Integer numberOfRealValues = null;
        Integer numberOfNominalValues = null;
        Integer numberOfValues = null;

        Double maximumValue = null;
        Double minimumValue = null;
        Double meanValue = null;
        Double standardDeviation = null;

        AttributeStats as = dataset.attributeStats(i);

        numberOfDistinctValues = as.distinctCount;
        numberOfUniqueValues = as.uniqueCount;
        numberOfMissingValues = as.missingCount;
        numberOfIntegerValues = as.intCount;
        numberOfRealValues = as.realCount;
        numberOfMissingValues = as.missingCount;

        if (att.isNominal()) {
            numberOfNominalValues = att.numValues();
        }
        numberOfValues = attributeStats.getTotalObservations();

        if (att.isNumeric()) {
            maximumValue = attributeStats.getMaximum();
            minimumValue = attributeStats.getMinimum();
            meanValue = attributeStats.getMean();
            standardDeviation = 0.0;
            try {
                standardDeviation = attributeStats.getStandardDeviation();
            } catch (Exception e) {
                Conversion.log("WARNING", "StdDev", "Could not compute standard deviation of feature "
                        + att.name() + ": " + e.getMessage());
            }
        }

        if (att.type() == 0) {
            data_type = "numeric";
        } else if (att.type() == 1) {
            data_type = "nominal";
        } else if (att.type() == 2) {
            data_type = "string";
        } else {
            data_type = "unknown";
        }

        resultFeatures.add(new Feature(att.index(), att.name(), data_type, att.index() == dataset.classIndex(),
                numberOfDistinctValues, numberOfUniqueValues, numberOfMissingValues, numberOfIntegerValues,
                numberOfRealValues, numberOfNominalValues, numberOfValues, maximumValue, minimumValue,
                meanValue, standardDeviation, attributeStats.getClassDistribution()));
    }
    return resultFeatures;
}

From source file:org.wkwk.classifier.MyC45.java

public double bestThreshold(Instances data, Attribute attr) {
    data.sort(attr);/* ww  w.  j  av  a  2s  .  com*/

    double m_ig = 0;
    double bestThr = 0;
    double classTemp = data.get(0).classValue();
    double valueTemp = data.get(0).value(attr);

    Enumeration instEnum = data.enumerateInstances();
    double dt;
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (classTemp != inst.classValue()) {
            classTemp = inst.classValue();
            dt = valueTemp;
            valueTemp = inst.value(attr);
            double threshold = dt + ((valueTemp - dt) / 2);
            double igTemp = computeInfoGainCont(data, attr, threshold);
            if (m_ig < igTemp) {
                m_ig = igTemp;
                bestThr = threshold;
            }
        }
    }
    return bestThr;
}

From source file:sentinets.Prediction.java

License:Open Source License

public void writeStats(Instances tweetInstances) {
    //TweetCorpusStatistics stats = new TweetCorpusStatistics();
    System.out.println("Stats Instances: \n" + tweetInstances.toSummaryString());
    for (int i = 0; i < tweetInstances.size(); i++) {
        String user = tweetInstances.get(i).stringValue(11 - 1);
        String mentions = tweetInstances.get(i).stringValue(3 - 1);
        String hashtags = tweetInstances.get(i).stringValue(14 - 1);
        String epClass = tweetInstances.get(i).stringValue(15 - 1);
        String snsClass = tweetInstances.get(i).stringValue(16 - 1);
        System.out.println("Tweet Details:\t" + user + "\t" + mentions + "\t" + hashtags + "\t"
                + printDist(classDist.get(i)));
        //stats.updateStatistics(user, mentions, hashtags, epClass+","+snsClass, classDist.get(i));
    }/*w w  w  . j av a  2s  .c  o m*/
}

From source file:soccer.core.models.BookKeeperConsistency.java

public void showTheInstance(int index) throws IOException {
    Instances instances = loader.getDataSet();
    Instance i = instances.get(index);
    System.out.println(i.toString());
}

From source file:svmal.SVMStrategy.java

public static Instances InstancesToInstances2(Instances insts) {
    Instances result = new Instances(insts, 0, 0);
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance orig = insts.get(i);
        Instance2 inst2 = new Instance2(orig.weight(), orig.toDoubleArray());
        inst2.setDataset(result);/*from w  w w. j a  va2  s  .  co  m*/
        result.add(inst2);
    }
    return result;
}

From source file:swm.project.mappings.UserToUserCluster.java

private void clusterUserHistoryWithKmeans() throws FileNotFoundException, IOException, Exception {
    Reader reader;//from ww  w .  j  a  v a2  s  .  co  m
    userToUserClusterHistory = new HashMap<>();
    userClustersToUsersHistory = new HashMap<>();
    reader = new FileReader(MappingConstants.USER_MOVIE_CLUSTERS);
    Instances instanceValues = new Instances(reader);
    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setNumClusters(20);
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setDistanceFunction(new EuclideanDistance());
    kmeans.buildClusterer(instanceValues);

    int[] assignments = kmeans.getAssignments();
    int userid = 0;
    for (int clusterNo : assignments) {
        int user = (int) instanceValues.get(userid).value(0);
        userToUserClusterHistory.put(user, clusterNo);
        ArrayList<Integer> users = new ArrayList<>();
        if (userClustersToUsersHistory.containsKey(clusterNo)) {
            users = userClustersToUsersHistory.get(clusterNo);
            users.add(user);
        } else {
            users.add(user);
            userClustersToUsersHistory.put(clusterNo, users);
        }
        userid++;

    }
}

From source file:test.org.moa.opencl.IBk.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test instance.
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 * @throws Exception if an error occurred during the prediction
 *//*  ww w . jav a 2s. com*/
public double[] distributionForInstance(Instance instance) throws Exception {

    if (m_Train.numInstances() == 0) {
        //throw new Exception("No training instances!");
        return m_defaultModel.distributionForInstance(instance);
    }
    if ((m_WindowSize > 0) && (m_Train.numInstances() > m_WindowSize)) {
        m_kNNValid = false;
        boolean deletedInstance = false;
        while (m_Train.numInstances() > m_WindowSize) {
            m_Train.delete(0);
        }
        //rebuild datastructure KDTree currently can't delete
        if (deletedInstance == true)
            m_NNSearch.setInstances(m_Train);
    }

    // Select k by cross validation
    if (!m_kNNValid && (m_CrossValidate) && (m_kNNUpper >= 1)) {
        crossValidate();
    }

    m_NNSearch.addInstanceInfo(instance);

    Instances neighbours = m_NNSearch.kNearestNeighbours(instance, m_kNN);
    double[] distances = m_NNSearch.getDistances();

    System.out.print("distances weka ");
    for (int i = 0; i < distances.length; ++i)
        System.out.print(" " + distances[i]);
    System.out.println();
    System.out.println("Neighbours");
    for (int i = 0; i < neighbours.size(); ++i)
        System.out.println(neighbours.get(i));
    double[] distribution = makeDistribution(neighbours, distances);

    return distribution;
}