Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity) 

Source Link

Document

Creates an empty set of instances.

Usage

From source file:com.reactivetechnologies.analytics.core.eval.AdaBoostM1WithBuiltClassifiers.java

License:Open Source License

@Override
protected void buildClassifierUsingResampling(Instances data) throws Exception {

    Instances trainData, training;//from   ww  w  .  j av  a 2  s . c o m
    double epsilon, reweight, sumProbs;
    Evaluation evaluation;
    int numInstances = data.numInstances();
    int resamplingIterations = 0;

    // Initialize data
    m_Betas = new double[m_Classifiers.length];
    m_NumIterationsPerformed = 0;
    // Create a copy of the data so that when the weights are diddled
    // with it doesn't mess up the weights for anyone else
    training = new Instances(data, 0, numInstances);
    sumProbs = training.sumOfWeights();
    for (int i = 0; i < training.numInstances(); i++) {
        training.instance(i).setWeight(training.instance(i).weight() / sumProbs);
    }

    // Do boostrap iterations
    for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) {
        if (m_Debug) {
            System.err.println("Training classifier " + (m_NumIterationsPerformed + 1));
        }

        // Select instances to train the classifier on
        if (m_WeightThreshold < 100) {
            trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100);
        } else {
            trainData = new Instances(training);
        }

        // Resample
        resamplingIterations = 0;
        double[] weights = new double[trainData.numInstances()];
        for (int i = 0; i < weights.length; i++) {
            weights[i] = trainData.instance(i).weight();
        }
        do {

            /** Changed here: DO NOT build classifier*/
            // Build and evaluate classifier
            //m_Classifiers[m_NumIterationsPerformed].buildClassifier(sample);
            /** End change */

            evaluation = new Evaluation(data);
            evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training);
            epsilon = evaluation.errorRate();
            resamplingIterations++;
        } while (Utils.eq(epsilon, 0) && (resamplingIterations < 10));

        // Stop if error too big or 0
        if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) {
            if (m_NumIterationsPerformed == 0) {
                m_NumIterationsPerformed = 1; // If we're the first we have to to use it
            }
            break;
        }

        // Determine the weight to assign to this model
        m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon);
        reweight = (1 - epsilon) / epsilon;
        if (m_Debug) {
            System.err.println("\terror rate = " + epsilon + "  beta = " + m_Betas[m_NumIterationsPerformed]);
        }

        // Update instance weights
        setWeights(training, reweight);
    }
}

From source file:com.reactivetechnologies.analytics.mapper.TEXTDataMapper.java

License:Open Source License

@Override
public Dataset mapStringToModel(JsonRequest request) throws ParseException {

    if (request != null && request.getData() != null && request.getData().length > 0) {
        FastVector fvWekaAttributes = new FastVector(2);
        FastVector nil = null;/*from  w w  w . ja  v a  2s .  co  m*/
        Attribute attr0 = new Attribute("text", nil, 0);
        FastVector fv = new FastVector();
        for (String nominal : request.getClassVars()) {
            fv.addElement(nominal);
        }
        Attribute attr1 = new Attribute("class", fv, 1);

        fvWekaAttributes.addElement(attr0);
        fvWekaAttributes.addElement(attr1);

        Instances ins = new Instances("attr-reln", fvWekaAttributes, request.getData().length);
        ins.setClassIndex(1);
        for (Text s : request.getData()) {
            Instance i = new Instance(2);
            i.setValue(attr0, s.getText());
            i.setValue(attr1, s.getTclass());
            ins.add(i);

        }

        return new Dataset(ins);
    }
    return null;
}

From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java

License:Open Source License

/**
 * FAST HACK REMOVING FUNCTIONALITIES FROM WEKA ORIGINAL METHOD!
 * /* ww w  .  j  a v a  2  s  . c  om*/
 * @param rs
 * @return
 * @throws SQLException
 */
public static Instances retrieveInstanceFromResultSet(ResultSet rs) throws SQLException {

    ResultSetMetaData md = rs.getMetaData();

    // Determine structure of the instances
    int numAttributes = md.getColumnCount();
    int[] attributeTypes = new int[numAttributes];
    Hashtable[] nominalIndexes = new Hashtable[numAttributes];
    FastVector[] nominalStrings = new FastVector[numAttributes];
    for (int i = 1; i <= numAttributes; i++) {
        attributeTypes[i - 1] = Attribute.NUMERIC;
    }

    // For sqlite
    // cache column names because the last while(rs.next()) { iteration for
    // the tuples below will close the md object:
    Vector<String> columnNames = new Vector<String>();
    for (int i = 0; i < numAttributes; i++) {
        columnNames.add(md.getColumnName(i + 1));
    }

    // Step through the tuples
    FastVector instances = new FastVector();
    int rowCount = 0;
    while (rs.next()) {

        double[] vals = new double[numAttributes];
        for (int i = 1; i <= numAttributes; i++) {

            int in = rs.getInt(i);
            if (rs.wasNull()) {
                vals[i - 1] = Instance.missingValue();
            } else {
                vals[i - 1] = in;
            }
            Instance newInst = new Instance(1.0, vals);
            instances.addElement(newInst);
            rowCount++;
        }
    }
    // disconnectFromDatabase(); (perhaps other queries might be made)

    // Create the header and add the instances to the dataset
    FastVector attribInfo = new FastVector();
    for (int i = 0; i < numAttributes; i++) {
        /* Fix for databases that uppercase column names */
        // String attribName = attributeCaseFix(md.getColumnName(i + 1));
        String attribName = columnNames.get(i);
        switch (attributeTypes[i]) {
        case Attribute.NOMINAL:
            attribInfo.addElement(new Attribute(attribName, nominalStrings[i]));
            break;
        case Attribute.NUMERIC:
            attribInfo.addElement(new Attribute(attribName));
            break;
        case Attribute.STRING:
            Attribute att = new Attribute(attribName, (FastVector) null);
            attribInfo.addElement(att);
            for (int n = 0; n < nominalStrings[i].size(); n++) {
                att.addStringValue((String) nominalStrings[i].elementAt(n));
            }
            break;
        case Attribute.DATE:
            attribInfo.addElement(new Attribute(attribName, (String) null));
            break;
        default:
            throw new SQLException("Unknown attribute type");
        }
    }

    Instances result = new Instances("QueryResult", attribInfo, instances.size());
    for (int i = 0; i < instances.size(); i++) {
        result.add((Instance) instances.elementAt(i));
    }

    rs.close();

    return result;

}

From source file:com.sensyscal.activityrecognition2.utils.Classifiers.java

License:LGPL

public static int customKnnClassifier(double[] newInstanceArray) {
    // TODO Auto-generated method stub
    ts1 = new Timestamp(System.currentTimeMillis());
    int activityId = 0;
    String classLabel = "";
    ArrayList<Attribute> atts = new ArrayList<Attribute>();
    ArrayList<String> classVal = new ArrayList<String>();
    classVal.add("STANDING");
    classVal.add("SITTING");
    classVal.add("LYINGDOWN");
    classVal.add("WALKING");
    atts.add(new Attribute("class", classVal));
    atts.add(new Attribute("1_1_2_1"));
    atts.add(new Attribute("1_1_3_1"));
    atts.add(new Attribute("1_1_9_2"));
    atts.add(new Attribute("2_1_3_1"));
    atts.add(new Attribute("2_1_4_1"));
    atts.add(new Attribute("2_1_9_2"));
    Instances dataUnlabeled = new Instances("TestInstances", atts, 0);
    dataUnlabeled.add(new DenseInstance(1.0, newInstanceArray));
    dataUnlabeled.setClassIndex(0);// w  w  w.j  a va  2 s  . c o  m
    try {
        activityId = (int) (MonitoringWorkerThread.cls.classifyInstance(dataUnlabeled.firstInstance()));
        classLabel = dataUnlabeled.firstInstance().classAttribute().value(activityId);
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    ts = new Timestamp(System.currentTimeMillis());
    // Log.e("classifyActivity Knn," -> Impiegati:
    // "+(ts.getTime()-ts1.getTime())+" ms;\n");
    return getActivityIDofClassLabel(classLabel);
}

From source file:com.sensyscal.activityrecognition2.utils.Classifiers.java

License:LGPL

public static int customJRipClassifier(double[] newInstanceArray) {
    // TODO Auto-generated method stub
    ts1 = new Timestamp(System.currentTimeMillis());
    int activityId = 0;
    String classLabel = "";
    ArrayList<Attribute> atts = new ArrayList<Attribute>();
    ArrayList<String> classVal = new ArrayList<String>();
    classVal.add("STANDING");
    classVal.add("WALKING");
    classVal.add("SITTING");
    classVal.add("LYINGDOWN");
    atts.add(new Attribute("class", classVal));
    atts.add(new Attribute("1_1_2_1"));
    atts.add(new Attribute("1_1_3_1"));
    atts.add(new Attribute("1_1_9_2"));
    atts.add(new Attribute("2_1_3_1"));
    atts.add(new Attribute("2_1_4_1"));
    atts.add(new Attribute("2_1_9_2"));

    Instances dataUnlabeled = new Instances("TestInstances", atts, 0);
    dataUnlabeled.add(new DenseInstance(1.0, newInstanceArray));
    dataUnlabeled.setClassIndex(0);//from   w w w  .j  a v  a2  s .  co  m
    try {
        activityId = (int) MonitoringWorkerThread.cls.classifyInstance(dataUnlabeled.firstInstance());
        Log.i("classifyActivity JRip ---->", activityId + "");
        classLabel = dataUnlabeled.firstInstance().classAttribute().value((int) activityId);
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    ts = new Timestamp(System.currentTimeMillis());
    // Log.i("classifyActivity JRip"," -> Impiegati: "+(ts.getTime()-ts1.getTime())+" ms;\n");
    return getActivityIDofClassLabel(classLabel);
}

From source file:com.sensyscal.activityrecognition2.utils.Classifiers.java

License:LGPL

public static int customJ48Classifier(double[] newInstanceArray) {
    // TODO Auto-generated method stub
    ts1 = new Timestamp(System.currentTimeMillis());
    int activityId = 0;
    String classLabel = "";
    ArrayList<Attribute> atts = new ArrayList<Attribute>();
    ArrayList<String> classVal = new ArrayList<String>();
    classVal.add("STANDING");
    classVal.add("SITTING");
    classVal.add("LYINGDOWN");
    classVal.add("WALKING");
    atts.add(new Attribute("class", classVal));
    atts.add(new Attribute("1_1_2_1"));
    atts.add(new Attribute("1_1_3_1"));
    atts.add(new Attribute("1_1_9_2"));
    atts.add(new Attribute("2_1_3_1"));
    atts.add(new Attribute("2_1_4_1"));
    atts.add(new Attribute("2_1_9_2"));
    Instances dataUnlabeled = new Instances("TestInstances", atts, 0);
    dataUnlabeled.add(new DenseInstance(1.0, newInstanceArray));
    dataUnlabeled.setClassIndex(0);//w w  w . jav a 2 s.c  o  m
    try {
        activityId = (int) getJ48ActivityId(
                MonitoringWorkerThread.cls.classifyInstance(dataUnlabeled.firstInstance()));
        classLabel = dataUnlabeled.firstInstance().classAttribute().value((int) activityId);
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    ts = new Timestamp(System.currentTimeMillis());
    // Log.e("classifyActivity J48"," -> Impiegati: "+(ts.getTime()-ts1.getTime())+" ms;\n");
    return activityId;// getActivityIDofClassLabel(classLabel);
}

From source file:com.yahoo.research.scoring.classifier.NutchOnlineClassifier.java

License:Apache License

/**
 * Internal function which initialized the {@link Instances} used by the
 * {@link Classifier} wrapped by the {@link AnthOnlineClassifier} class.
 */// www.  j  a  v  a2s  .  c om
private void initInstances() {
    // gather attributes
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    ArrayList<String> allowedClasses = new ArrayList<String>();
    allowedClasses.add("sem");
    allowedClasses.add("nonsem");
    Attribute classAttribute = new Attribute("class", allowedClasses);
    attributes.add(classAttribute);
    // this looks somehow stupid to me :/
    List<String> vector = null;
    attributes.add(new Attribute("domain", vector));
    attributes.add(new Attribute("sempar"));
    attributes.add(new Attribute("nonsempar"));
    attributes.add(new Attribute("semsib"));
    attributes.add(new Attribute("nonsemsib"));
    for (int i = 0; i < hashTrickSize; i++) {
        // the boolAttValues here should not be necessary but based on some
        // runtime experiements they make a (slight) difference as it is not
        // possible to create directly boolean attributes. The time to
        // define a split is reduced by doing this with nominal.
        attributes.add(new Attribute(getAttributeNameOfHash(i), boolAttValues));
    }
    // now we create the Instances
    instances = new Instances("Anthelion", attributes, 1);
    instances.setClass(classAttribute);
    attributesIndex = new HashMap<String, Integer>();
    for (int i = 0; i < attributes.size(); i++) {
        attributesIndex.put(attributes.get(i).name(), i);
    }
    // set dimension (class + domain + 4xgraph + hashes)
    dimension = 1 + 1 + 4 + hashTrickSize;
    // init replacement array
    replaceMissingValues = new double[dimension];
    for (int i = 0; i < dimension; i++) {
        replaceMissingValues[i] = 0.0;
    }
}

From source file:core.classification.Classifiers.java

License:Open Source License

/**
 * Private constructor for the <code>Classifiers</code> object
 * @param train//from   w  w w.j  av  a  2 s.  co  m
 */
private Classifiers(boolean train) {
    SCA = new BayesNet();
    SCB = new MultilayerPerceptron();
    SCC1 = new MultilayerPerceptron();
    SCC2 = new MultilayerPerceptron();
    SCC3 = new MultilayerPerceptron();

    RC = new CostSensitiveClassifier();
    YNC = new J48();

    if (train) {
        try {
            this.trainSC();
        } catch (Exception e) {
            System.out.println("The system encountered the following error while training SC:");
            e.printStackTrace();
        }
        try {
            this.trainRC();
        } catch (Exception e) {
            System.out.println("The system encountered the following error while training RC:");
            e.printStackTrace();
        }
        try {
            this.trainYNC();
        } catch (Exception e) {
            System.out.println("The system encountered the following error while training YNC:");
            e.printStackTrace();
        }
    } else {
        try {
            readSC("SCA.model", "SCB.model", "SCC1.model", "SCC2.model", "SCC3.model");
            readRC("RC.model");
            readYNC("YNC.model");
        } catch (Exception e) {
            System.out.println("Error while reading the classifiers: ");
            e.printStackTrace();
        }
    }

    // Strutures Creations
    FastVector labels = new FastVector();
    labels.addElement("0");
    labels.addElement("1");
    labels.addElement("2");
    labels.addElement("3");
    labels.addElement("4");

    FastVector clabels = new FastVector();
    clabels.addElement("1");
    clabels.addElement("2");
    clabels.addElement("3");
    clabels.addElement("4");

    FastVector clabels2 = new FastVector();
    clabels2.addElement("0");
    clabels2.addElement("1");
    clabels2.addElement("2");
    clabels2.addElement("3");
    clabels2.addElement("4");

    FastVector clabels3 = new FastVector();
    clabels3.addElement("Y");
    clabels3.addElement("N");

    // Creating the structure for SC
    FastVector attrs = new FastVector();
    attrs.addElement(new Attribute("RATIO"));
    attrs.addElement(new Attribute("CLASS", clabels));
    dataStructSCA = new Instances("SCA-STRUCT", attrs, 0);
    dataStructSCA.setClassIndex(1);

    FastVector attrsB = new FastVector();
    attrsB.addElement(new Attribute("H2"));
    attrsB.addElement(new Attribute("D2"));
    attrsB.addElement(new Attribute("DX"));
    attrsB.addElement(new Attribute("PCLASS", clabels));
    attrsB.addElement(new Attribute("CLASS", clabels));
    dataStructSCB = new Instances("SCB-STRUCT", attrsB, 0);
    dataStructSCB.setClassIndex(4);

    FastVector attrsC1 = new FastVector();
    FastVector attrsC2 = new FastVector();
    FastVector attrsC3 = new FastVector();
    attrsC1.addElement(new Attribute("LH"));
    attrsC1.addElement(new Attribute("LD"));
    attrsC1.addElement(new Attribute("LDX"));
    attrsC1.addElement(new Attribute("LCLASS", clabels));
    attrsC1.addElement(new Attribute("CLASS", clabels));

    attrsC2.addElement(new Attribute("EH"));
    attrsC2.addElement(new Attribute("ED"));
    attrsC2.addElement(new Attribute("EDX"));
    attrsC2.addElement(new Attribute("ECLASS", clabels));
    attrsC2.addElement(new Attribute("CLASS", clabels));

    attrsC3.addElement(new Attribute("SH"));
    attrsC3.addElement(new Attribute("SD"));
    attrsC3.addElement(new Attribute("SDX"));
    attrsC3.addElement(new Attribute("SCLASS", clabels));
    attrsC3.addElement(new Attribute("CLASS", clabels));

    dataStructSCC1 = new Instances("SCC1-STRUCT", attrsC1, 0);
    dataStructSCC1.setClassIndex(4);

    dataStructSCC2 = new Instances("SCC2-STRUCT", attrsC2, 0);
    dataStructSCC2.setClassIndex(4);

    dataStructSCC3 = new Instances("SCC3-STRUCT", attrsC3, 0);
    dataStructSCC3.setClassIndex(4);

    FastVector attrs2 = new FastVector();
    attrs2.addElement(new Attribute("H2"));
    attrs2.addElement(new Attribute("D2"));
    attrs2.addElement(new Attribute("DX"));
    attrs2.addElement(new Attribute("CLASS", clabels));
    attrs2.addElement(new Attribute("PCLASS", clabels));
    attrs2.addElement(new Attribute("RELID", clabels2));
    dataStructRC = new Instances("RC-STRUCT", attrs2, 0);
    dataStructRC.setClassIndex(5);

    FastVector attrs3 = new FastVector();
    attrs3.addElement(new Attribute("PCLASS", clabels));
    attrs3.addElement(new Attribute("CCLASS", clabels));
    attrs3.addElement(new Attribute("RAREA"));
    attrs3.addElement(new Attribute("H"));
    attrs3.addElement(new Attribute("D"));
    attrs3.addElement(new Attribute("V"));
    attrs3.addElement(new Attribute("YN", clabels3));
    dataStructYC = new Instances("YC-STRUCT", attrs3, 0);
    dataStructYC.setClassIndex(6);
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * num??//from  w  w w .ja  va 2s  .  co m
 * Returns the Centroids
 * */
public Instances getCentroids(int num) {
    FastVector atts = new FastVector();
    Attribute clusterID = new Attribute("clusterID");
    atts.addElement(clusterID);
    Instances data = new Instances("centroids", atts, m_numClusters);
    for (int i = 0; i < ID.numAttributes() - 1; i++) {
        Attribute att = new Attribute("Subject" + String.valueOf(i));
        atts.addElement(att);
    }
    double[] map = new double[m_numClusters];
    double[] temp = new double[m_clusterAssignments.length];
    System.arraycopy(m_clusterAssignments, 0, temp, 0, m_clusterAssignments.length);
    int n = map.length;
    for (int i = 0; i < m_clusterAssignments.length; i++) {
        double id = temp[i];
        if (id == -1)
            continue;
        boolean flag = true;
        for (int j = 0; j < temp.length; j++) {
            if (temp[j] == id) {
                temp[j] = -1;
            }
        }
        if (flag && n != -1) {
            map[map.length - n] = id + num;
            n--;
        } else if (n != -1) {
            continue;
        } else {
            break;
        }
    }
    for (int i = 0; i < map.length; i++) {
        double id = map[i];
        double[] averatts = new double[ID.numAttributes()];
        int count = 0;
        for (int j = 0; j < ID.numInstances(); j++) {
            Instance iter = ID.instance(j);
            if (iter.value(0) == id) {
                averatts = CommonMethords.add(averatts, iter.toDoubleArray());
                count++;
            }
        }
        averatts = CommonMethords.calAver(averatts, count);
        Instance ins = new Instance(1, averatts);
        data.add(ins);
    }
    return data;
}

From source file:core.DBScan.java

License:Open Source License

/**
 * Generate Clustering via DBScan/*  w  w  w  .  java  2 s  .  com*/
 * @param instances The instances that need to be clustered
 * @throws java.lang.Exception If clustering was not successful
 */
public void buildClusterer(Instances instances) throws Exception {
    // can clusterer handle the data?
    getCapabilities().testWithFail(instances);
    long time_1 = System.currentTimeMillis();

    processed_InstanceID = 0;
    numberOfGeneratedClusters = 0;
    clusterID = 0;

    replaceMissingValues_Filter = new ReplaceMissingValues();
    replaceMissingValues_Filter.setInputFormat(instances);
    Instances filteredInstances = Filter.useFilter(instances, replaceMissingValues_Filter);

    database = databaseForName(getDatabase_Type(), filteredInstances);
    for (int i = 0; i < database.getInstances().numInstances(); i++) {
        DataObject dataObject = dataObjectForName(getDatabase_distanceType(),
                database.getInstances().instance(i), Integer.toString(i), database);
        database.insert(dataObject);
    }
    database.setMinMaxValues();

    Iterator iterator = database.dataObjectIterator();
    while (iterator.hasNext()) {
        DataObject dataObject = (DataObject) iterator.next();
        if (dataObject.getClusterLabel() == DataObject.UNCLASSIFIED) {
            if (expandCluster(dataObject)) {
                clusterID++;
                numberOfGeneratedClusters++;
            }
        }
    }

    long time_2 = System.currentTimeMillis();
    elapsedTime = (double) (time_2 - time_1) / 1000.0;

    FastVector atts = new FastVector();
    Attribute att = new Attribute("clusterID");
    atts.addElement(att);

    double[][] arr = new CommonMethords().InstanceToArrays(instances);

    for (int i = 0; i < arr[0].length; i++) {
        Attribute att2 = new Attribute("Subject" + String.valueOf(i));
        atts.addElement(att2);
    }
    ID = new Instances("doc-subject", atts, arr.length);

    for (int i = 0; i < arr.length; i++) {
        double[] attsarr = new double[arr.length + 1];
        DataObject t = database.getDataObject(Integer.toString(i));
        if (t.getClusterLabel() == DataObject.NOISE)
            attsarr[0] = -1;
        else
            attsarr[0] = t.getClusterLabel();
        for (int j = 0; j < arr[0].length; j++) {
            attsarr[j + 1] = arr[0][j];
        }
        Instance ins = new Instance(1, attsarr);
        ID.add(ins);
    }
}