Example usage for weka.core Instance setValue

Introduction

In this page you can find the example usage for weka.core Instance setValue.

Prototype

public void setValue(Attribute att, String value);

Source Link

Document

Sets a value of an nominal or string attribute to the given value.

Usage

From source file:oxis.yologp.YOLogPDescriptor.java

License:Open Source License

/**
 * Train a model, erase the other one//from w  w w.  j a va2  s.c o m
 *
 * @param String name of the model to save
 */
public void train(String name) throws Exception {

    compute();

    Instances instances = buildDataset();

    model = new RandomForest();

    Map<Object, Object> properties;
    for (DrugStruct drugStruct : listDrug) {

        if (drugStruct.drug.getProperty("flag")) {
            properties = drugStruct.drug.getProperties();
            Instance instance = new DenseInstance(instances.numAttributes()); //28 + 1024
            instance.setDataset(instances);
            for (Object propKey : properties.keySet()) {
                if (!(propKey.equals("hash") || propKey.equals("flag") || propKey.equals("smiles"))) {
                    try {
                        instance.setValue(instances.attribute(propKey.toString()),
                                Double.parseDouble(properties.get(propKey).toString()));
                    } catch (NullPointerException ex) {
                        Logger.getLogger(YOLogPDescriptor.class.getName()).log(Level.WARNING,
                                "Property not used: {0}", propKey.toString());
                    }
                }
            }
            instance.setClassValue(drugStruct.getLogP());
            instances.add(instance);
        }
    }
    model.setNumFeatures(200);
    model.setNumTrees(400);
    model.setMaxDepth(0);
    model.buildClassifier(instances);

    weka.core.SerializationHelper.write(path + name, model);
}

From source file:Part2.HierarchicalClusterer.java

License:Open Source License

/** calculate the distance between two clusters 
 * @param cluster1 list of indices of instances in the first cluster
 * @param cluster2 dito for second cluster
 * @return distance between clusters based on link type
 *//*from  ww  w. ja  va 2  s .c  o m*/
double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) {
    double fBestDist = Double.MAX_VALUE;
    //double SemiDist =  m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2);
    switch (m_nLinkType) {
    case SINGLE:
        // find single link distance aka minimum link, which is the closest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = Double.MAX_VALUE;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist > fDist) {
                    fBestDist = fDist;
                }
            }
        }
        break;
    case COMPLETE:
    case ADJCOMLPETE:
        // find complete link distance aka maximum link, which is the largest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist < fDist) {
                    fBestDist = fDist;
                }
            }
        }
        if (m_nLinkType == COMPLETE) {
            break;
        }
        // calculate adjustment, which is the largest within cluster distance
        double fMaxDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = i + 1; j < cluster1.size(); j++) {
                int i2 = cluster1.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        for (int i = 0; i < cluster2.size(); i++) {
            int i1 = cluster2.elementAt(i);
            for (int j = i + 1; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        fBestDist -= fMaxDist;
        break;
    case AVERAGE:
        // finds average distance between the elements of the two clusters
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        fBestDist /= (cluster1.size() * cluster2.size());
        break;
    case MEAN: {
        // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering)
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        fBestDist = 0;
        for (int i = 0; i < merged.size(); i++) {
            int i1 = merged.elementAt(i);
            for (int j = i + 1; j < merged.size(); j++) {
                int i2 = merged.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        int n = merged.size();
        fBestDist /= (n * (n - 1.0) / 2.0);
    }
        break;
    case CENTROID:
        // finds the distance of the centroids of the clusters
        double[] fValues1 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster1.size(); i++) {
            Instance instance = m_instances.instance(cluster1.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues1[j] += instance.value(j);
            }
        }
        double[] fValues2 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster2.size(); i++) {
            Instance instance = m_instances.instance(cluster2.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues2[j] += instance.value(j);
            }
        }
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            fValues1[j] /= cluster1.size();
            fValues2[j] /= cluster2.size();
        }
        // set up two instances for distance function
        Instance instance1 = (Instance) m_instances.instance(0).copy();
        Instance instance2 = (Instance) m_instances.instance(0).copy();
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            instance1.setValue(j, fValues1[j]);
            instance2.setValue(j, fValues2[j]);
        }
        fBestDist = m_DistanceFunction.distance(instance1, instance2);
        break;
    case WARD: {
        // finds the distance of the change in caused by merging the cluster.
        // The information of a cluster is calculated as the error sum of squares of the
        // centroids of the cluster and its members.
        double ESS1 = calcESS(cluster1);
        double ESS2 = calcESS(cluster2);
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        double ESS = calcESS(merged);
        fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size();
    }
        break;
    }
    //double alpha = 1;
    //return alpha*SemiDist + (1-alpha)*fBestDist;
    return fBestDist;
}

From source file:Part2.HierarchicalClustererEx.java

License:Open Source License

/** calculate the distance between two clusters 
 * @param cluster1 list of indices of instances in the first cluster
 * @param cluster2 dito for second cluster
 * @return distance between clusters based on link type
 */// ww w. ja  v a2 s.c o m
double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) {
    double fBestDist = Double.MAX_VALUE;
    //double SemiDist =  m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2);
    switch (m_nLinkType) {
    case SINGLE:
        // find single link distance aka minimum link, which is the closest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = Double.MAX_VALUE;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist > fDist) {
                    fBestDist = fDist;
                }
            }
        }
        break;
    case COMPLETE:
    case ADJCOMLPETE:
        // find complete link distance aka maximum link, which is the largest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist < fDist) {
                    fBestDist = fDist;
                }
            }
        }
        if (m_nLinkType == COMPLETE) {
            break;
        }
        // calculate adjustment, which is the largest within cluster distance
        double fMaxDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = i + 1; j < cluster1.size(); j++) {
                int i2 = cluster1.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        for (int i = 0; i < cluster2.size(); i++) {
            int i1 = cluster2.elementAt(i);
            for (int j = i + 1; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        fBestDist -= fMaxDist;
        break;
    case AVERAGE:
        // finds average distance between the elements of the two clusters
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        fBestDist /= (cluster1.size() * cluster2.size());
        break;
    case MEAN: {
        // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering)
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        fBestDist = 0;
        for (int i = 0; i < merged.size(); i++) {
            int i1 = merged.elementAt(i);
            for (int j = i + 1; j < merged.size(); j++) {
                int i2 = merged.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        int n = merged.size();
        fBestDist /= (n * (n - 1.0) / 2.0);
    }
        break;
    case CENTROID:
        // finds the distance of the centroids of the clusters
        double[] fValues1 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster1.size(); i++) {
            Instance instance = m_instances.instance(cluster1.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues1[j] += instance.value(j);
            }
        }
        double[] fValues2 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster2.size(); i++) {
            Instance instance = m_instances.instance(cluster2.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues2[j] += instance.value(j);
            }
        }
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            fValues1[j] /= cluster1.size();
            fValues2[j] /= cluster2.size();
        }
        // set up two instances for distance function
        Instance instance1 = (Instance) m_instances.instance(0).copy();
        Instance instance2 = (Instance) m_instances.instance(0).copy();
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            instance1.setValue(j, fValues1[j]);
            instance2.setValue(j, fValues2[j]);
        }
        fBestDist = m_DistanceFunction.distance(instance1, instance2);
        break;
    case WARD: {
        // finds the distance of the change in caused by merging the cluster.
        // The information of a cluster is calculated as the error sum of squares of the
        // centroids of the cluster and its members.
        double ESS1 = calcESS(cluster1);
        double ESS2 = calcESS(cluster2);
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        double ESS = calcESS(merged);
        fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size();
    }
        break;
    }
    double alpha = 0.5;
    //return alpha*SemiDist + (1-alpha)*fBestDist;
    return fBestDist;
}

From source file:pl.nask.hsn2.service.analysis.JSWekaAnalyzer.java

License:Open Source License

public final JSClass classifyString(File file) {
    String ngrams = NGramsCalc.getNgramsForFile(file.getPath(), ngramsLength, ngramsQuantity);

    if (ngrams == null) {
        LOGGER.info("No ngrams extracted, probably JS source is too short");
    } else {// ww w. j  a  va2 s .c o  m
        StringTokenizer st = new StringTokenizer(ngrams, " ");
        if (st.countTokens() >= ngramsQuantity) {

            Instance t = new Instance(2);
            t.setDataset(trainingSet);
            t.setValue(0, ngrams);

            try {
                double dd = fc.classifyInstance(t);
                return JSClass.valueOf(trainingSet.classAttribute().value((int) dd).toUpperCase());
            } catch (Exception e) {
                LOGGER.error(e.getMessage(), e);
            }
        }
    }
    return JSClass.UNCLASSIFIED;
}

From source file:Prediccion.PrecidePasoNodo.java

License:Open Source License

Instances cargarDatos(int hora) throws ParseException {
    //Declaramos los atributos de las instancias
    Attribute a0 = new Attribute("Intervalo", "yyyy-MM-dd HH:mm:ss");
    Attribute a1 = new Attribute("Total");

    ArrayList<Attribute> c = new ArrayList<>();
    c.add(a0);//w  w  w  . j  a  v a 2  s.  c  om
    c.add(a1);

    //Creamos el conjunto de instancias
    Instances instances = new Instances(nodo, c, 1000);

    //Instanciamos conexion con FT
    cFT = new conectarFusionTables();
    Sqlresponse r = cFT.select(TABLAID, "Intervalo, Total",
            "idNodo = " + nodo + " and Intervalo ENDS WITH '00:00:00'",
            "ORDER BY \'Intervalo\' DESC LIMIT 10000");

    for (List<Object> a : r.getRows()) {
        Instance i = new DenseInstance(2);

        String s0 = (String) a.get(0);
        String s1 = (String) a.get(1);

        System.err.println(s0 + " ->" + s1);

        i.setValue(instances.attribute(0), instances.attribute(0).parseDate(s0));
        i.setValue(instances.attribute(1), Integer.parseInt(s1));

        instances.add(i);
    }

    instances.sort(0);

    return instances;
}

From source file:Prediccion.Prediccion.java

License:Open Source License

ArrayList<Instances> cargarDatos() throws ParseException {
    //Declaramos los atributos de las instancias
    Attribute a0 = new Attribute("Intervalo", "yyyy-MM-dd HH:mm:ss");
    Attribute a1 = new Attribute("Total");

    ArrayList<Attribute> c = new ArrayList<>();
    c.add(a0);/*from   w  ww .j a v a 2  s . co m*/
    c.add(a1);

    //Creamos el conjunto de instancias
    ArrayList<Instances> instances = new ArrayList<>(24);

    for (int i = 0; i < 24; i++) {
        instances.add(new Instances(nodo, c, 1000));
    }

    //Instanciamos conexion con FT
    cFT = new conectarFusionTables();
    Sqlresponse r = cFT.select(TABLAID, "Intervalo, Total", "idNodo = " + nodo + " ",
            "ORDER BY \'Intervalo\' DESC LIMIT 10000");

    try {
        System.err.println(r.toPrettyString());
    } catch (IOException ex) {
        Logger.getLogger(Prediccion.class.getName()).log(Level.SEVERE, null, ex);
    }

    for (List<Object> a : r.getRows()) {
        Instance i = new DenseInstance(2);

        String s0 = (String) a.get(0);
        String s1 = (String) a.get(1);
        int hora = Integer.parseInt(s0.substring(11, 13));

        System.err.println(s0 + " ->" + s1 + "  " + hora);

        i.setValue(instances.get(hora).attribute(0), instances.get(hora).attribute(0).parseDate(s0));
        i.setValue(instances.get(hora).attribute(1), Integer.parseInt(s1));

        instances.get(hora).add(i);

    }

    for (Instances a : instances) {
        a.sort(0);
    }

    return instances;
}

From source file:predictors.HelixIndexer.java

License:Open Source License

/**
 * Analyzes a given window and saves it in the database.
 * /*from   w w w .j  a va 2  s. c om*/
 * @param pssm
 * @param windowCenter
 * @param structure
 */
private void addWindowToDatabase(Pssm pssm, int windowCenter, char[] structure) {
    int index = Mappings.ssToInt(structure[windowCenter]);
    Instance window = this.buildInstance(pssm, windowCenter);

    if (index == Mappings.indexTmh) {
        index = HelixIndexer.indexTmh;
    } else if (index == Mappings.indexSignal) {
        index = HelixIndexer.indexSignal;
    } else {
        index = HelixIndexer.indexNotTmh;
    }

    window.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), index);
    window.setDataset(this.dataset);

    this.dataset.add(window);
}

From source file:predictors.HelixPredictor.java

License:Open Source License

/**
 * Analyzes a given segment (TMH or not) and saves it in the database.
 * /*from  ww w  .ja va2s .  c o m*/
 * @param pssm
 * @param start
 * @param end
 * @param structureIndex
 */
private void addSegmentToDatabse(Pssm pssm, int start, int end, int structureIndex) {
    Instance segment = this.buildInstance(pssm, start, end);

    segment.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), structureIndex);

    segment.setDataset(this.dataset);

    this.dataset.add(segment);
}

From source file:predictors.TopologyPredictor.java

License:Open Source License

/**
 * Analyzes a given window and saves it in the database.
 * /*ww  w . j  ava  2  s  . c om*/
 * @param pssm
 * @param structure
 * @param structureIndex
 * @param startPos
 */
private void addProteinToDatabse(Pssm pssm, char[] structure, int structureIndex, int startPos) {
    ArrayList<Segment> solSegments = findSegments(structure);
    Instance segment = this.buildInstance(pssm, structure, solSegments, startPos);

    segment.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), structureIndex);

    segment.setDataset(this.dataset);

    this.dataset.add(segment);
}

From source file:probcog.bayesnets.core.ClustererDiscretizationFilter.java

License:Open Source License

public String getValueForContinuous(double continuous) {
    Instance inst = new Instance(1);
    inst.setValue(0, continuous);
    try {/*from  ww w . j  ava  2 s .  com*/
        int cluster = clusterer.clusterInstance(inst);
        return outputValues[cluster];
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}