Example usage for weka.core Instance setValue

List of usage examples for weka.core Instance setValue

Introduction

In this page you can find the example usage for weka.core Instance setValue.

Prototype

public void setValue(Attribute att, String value);

Source Link

Document

Sets a value of an nominal or string attribute to the given value.

Usage

From source file:oxis.yologp.YOLogPDescriptor.java

License:Open Source License

/**
 * Train a model, erase the other one//from w  w w.  j a va2  s.c o m
 *
 * @param String name of the model to save
 */
public void train(String name) throws Exception {

    compute();

    Instances instances = buildDataset();

    model = new RandomForest();

    Map<Object, Object> properties;
    for (DrugStruct drugStruct : listDrug) {

        if (drugStruct.drug.getProperty("flag")) {
            properties = drugStruct.drug.getProperties();
            Instance instance = new DenseInstance(instances.numAttributes()); //28 + 1024
            instance.setDataset(instances);
            for (Object propKey : properties.keySet()) {
                if (!(propKey.equals("hash") || propKey.equals("flag") || propKey.equals("smiles"))) {
                    try {
                        instance.setValue(instances.attribute(propKey.toString()),
                                Double.parseDouble(properties.get(propKey).toString()));
                    } catch (NullPointerException ex) {
                        Logger.getLogger(YOLogPDescriptor.class.getName()).log(Level.WARNING,
                                "Property not used: {0}", propKey.toString());
                    }
                }
            }
            instance.setClassValue(drugStruct.getLogP());
            instances.add(instance);
        }
    }
    model.setNumFeatures(200);
    model.setNumTrees(400);
    model.setMaxDepth(0);
    model.buildClassifier(instances);

    weka.core.SerializationHelper.write(path + name, model);
}

From source file:Part2.HierarchicalClusterer.java

License:Open Source License

/** calculate the distance between two clusters 
 * @param cluster1 list of indices of instances in the first cluster
 * @param cluster2 dito for second cluster
 * @return distance between clusters based on link type
 *//*from  ww  w. ja  va 2  s .c  o m*/
double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) {
    double fBestDist = Double.MAX_VALUE;
    //double SemiDist =  m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2);
    switch (m_nLinkType) {
    case SINGLE:
        // find single link distance aka minimum link, which is the closest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = Double.MAX_VALUE;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist > fDist) {
                    fBestDist = fDist;
                }
            }
        }
        break;
    case COMPLETE:
    case ADJCOMLPETE:
        // find complete link distance aka maximum link, which is the largest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist < fDist) {
                    fBestDist = fDist;
                }
            }
        }
        if (m_nLinkType == COMPLETE) {
            break;
        }
        // calculate adjustment, which is the largest within cluster distance
        double fMaxDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = i + 1; j < cluster1.size(); j++) {
                int i2 = cluster1.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        for (int i = 0; i < cluster2.size(); i++) {
            int i1 = cluster2.elementAt(i);
            for (int j = i + 1; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        fBestDist -= fMaxDist;
        break;
    case AVERAGE:
        // finds average distance between the elements of the two clusters
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        fBestDist /= (cluster1.size() * cluster2.size());
        break;
    case MEAN: {
        // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering)
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        fBestDist = 0;
        for (int i = 0; i < merged.size(); i++) {
            int i1 = merged.elementAt(i);
            for (int j = i + 1; j < merged.size(); j++) {
                int i2 = merged.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        int n = merged.size();
        fBestDist /= (n * (n - 1.0) / 2.0);
    }
        break;
    case CENTROID:
        // finds the distance of the centroids of the clusters
        double[] fValues1 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster1.size(); i++) {
            Instance instance = m_instances.instance(cluster1.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues1[j] += instance.value(j);
            }
        }
        double[] fValues2 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster2.size(); i++) {
            Instance instance = m_instances.instance(cluster2.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues2[j] += instance.value(j);
            }
        }
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            fValues1[j] /= cluster1.size();
            fValues2[j] /= cluster2.size();
        }
        // set up two instances for distance function
        Instance instance1 = (Instance) m_instances.instance(0).copy();
        Instance instance2 = (Instance) m_instances.instance(0).copy();
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            instance1.setValue(j, fValues1[j]);
            instance2.setValue(j, fValues2[j]);
        }
        fBestDist = m_DistanceFunction.distance(instance1, instance2);
        break;
    case WARD: {
        // finds the distance of the change in caused by merging the cluster.
        // The information of a cluster is calculated as the error sum of squares of the
        // centroids of the cluster and its members.
        double ESS1 = calcESS(cluster1);
        double ESS2 = calcESS(cluster2);
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        double ESS = calcESS(merged);
        fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size();
    }
        break;
    }
    //double alpha = 1;
    //return alpha*SemiDist + (1-alpha)*fBestDist;
    return fBestDist;
}

From source file:Part2.HierarchicalClustererEx.java

License:Open Source License

/** calculate the distance between two clusters 
 * @param cluster1 list of indices of instances in the first cluster
 * @param cluster2 dito for second cluster
 * @return distance between clusters based on link type
 */// ww w. ja  v a2 s.c o m
double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) {
    double fBestDist = Double.MAX_VALUE;
    //double SemiDist =  m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2);
    switch (m_nLinkType) {
    case SINGLE:
        // find single link distance aka minimum link, which is the closest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = Double.MAX_VALUE;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist > fDist) {
                    fBestDist = fDist;
                }
            }
        }
        break;
    case COMPLETE:
    case ADJCOMLPETE:
        // find complete link distance aka maximum link, which is the largest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist < fDist) {
                    fBestDist = fDist;
                }
            }
        }
        if (m_nLinkType == COMPLETE) {
            break;
        }
        // calculate adjustment, which is the largest within cluster distance
        double fMaxDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = i + 1; j < cluster1.size(); j++) {
                int i2 = cluster1.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        for (int i = 0; i < cluster2.size(); i++) {
            int i1 = cluster2.elementAt(i);
            for (int j = i + 1; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        fBestDist -= fMaxDist;
        break;
    case AVERAGE:
        // finds average distance between the elements of the two clusters
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        fBestDist /= (cluster1.size() * cluster2.size());
        break;
    case MEAN: {
        // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering)
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        fBestDist = 0;
        for (int i = 0; i < merged.size(); i++) {
            int i1 = merged.elementAt(i);
            for (int j = i + 1; j < merged.size(); j++) {
                int i2 = merged.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        int n = merged.size();
        fBestDist /= (n * (n - 1.0) / 2.0);
    }
        break;
    case CENTROID:
        // finds the distance of the centroids of the clusters
        double[] fValues1 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster1.size(); i++) {
            Instance instance = m_instances.instance(cluster1.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues1[j] += instance.value(j);
            }
        }
        double[] fValues2 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster2.size(); i++) {
            Instance instance = m_instances.instance(cluster2.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues2[j] += instance.value(j);
            }
        }
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            fValues1[j] /= cluster1.size();
            fValues2[j] /= cluster2.size();
        }
        // set up two instances for distance function
        Instance instance1 = (Instance) m_instances.instance(0).copy();
        Instance instance2 = (Instance) m_instances.instance(0).copy();
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            instance1.setValue(j, fValues1[j]);
            instance2.setValue(j, fValues2[j]);
        }
        fBestDist = m_DistanceFunction.distance(instance1, instance2);
        break;
    case WARD: {
        // finds the distance of the change in caused by merging the cluster.
        // The information of a cluster is calculated as the error sum of squares of the
        // centroids of the cluster and its members.
        double ESS1 = calcESS(cluster1);
        double ESS2 = calcESS(cluster2);
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        double ESS = calcESS(merged);
        fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size();
    }
        break;
    }
    double alpha = 0.5;
    //return alpha*SemiDist + (1-alpha)*fBestDist;
    return fBestDist;
}

From source file:pl.nask.hsn2.service.analysis.JSWekaAnalyzer.java

License:Open Source License

public final JSClass classifyString(File file) {
    String ngrams = NGramsCalc.getNgramsForFile(file.getPath(), ngramsLength, ngramsQuantity);

    if (ngrams == null) {
        LOGGER.info("No ngrams extracted, probably JS source is too short");
    } else {// ww w. j  a  va2 s .c o  m
        StringTokenizer st = new StringTokenizer(ngrams, " ");
        if (st.countTokens() >= ngramsQuantity) {

            Instance t = new Instance(2);
            t.setDataset(trainingSet);
            t.setValue(0, ngrams);

            try {
                double dd = fc.classifyInstance(t);
                return JSClass.valueOf(trainingSet.classAttribute().value((int) dd).toUpperCase());
            } catch (Exception e) {
                LOGGER.error(e.getMessage(), e);
            }
        }
    }
    return JSClass.UNCLASSIFIED;
}

From source file:Prediccion.PrecidePasoNodo.java

License:Open Source License

Instances cargarDatos(int hora) throws ParseException {
    //Declaramos los atributos de las instancias
    Attribute a0 = new Attribute("Intervalo", "yyyy-MM-dd HH:mm:ss");
    Attribute a1 = new Attribute("Total");

    ArrayList<Attribute> c = new ArrayList<>();
    c.add(a0);//w  w  w  . j  a  v a 2  s.  c  om
    c.add(a1);

    //Creamos el conjunto de instancias
    Instances instances = new Instances(nodo, c, 1000);

    //Instanciamos conexion con FT
    cFT = new conectarFusionTables();
    Sqlresponse r = cFT.select(TABLAID, "Intervalo, Total",
            "idNodo = " + nodo + " and Intervalo ENDS WITH '00:00:00'",
            "ORDER BY \'Intervalo\' DESC LIMIT 10000");

    for (List<Object> a : r.getRows()) {
        Instance i = new DenseInstance(2);

        String s0 = (String) a.get(0);
        String s1 = (String) a.get(1);

        System.err.println(s0 + " ->" + s1);

        i.setValue(instances.attribute(0), instances.attribute(0).parseDate(s0));
        i.setValue(instances.attribute(1), Integer.parseInt(s1));

        instances.add(i);
    }

    instances.sort(0);

    return instances;
}

From source file:Prediccion.Prediccion.java

License:Open Source License

ArrayList<Instances> cargarDatos() throws ParseException {
    //Declaramos los atributos de las instancias
    Attribute a0 = new Attribute("Intervalo", "yyyy-MM-dd HH:mm:ss");
    Attribute a1 = new Attribute("Total");

    ArrayList<Attribute> c = new ArrayList<>();
    c.add(a0);/*from   w  ww .j a v a 2  s . co m*/
    c.add(a1);

    //Creamos el conjunto de instancias
    ArrayList<Instances> instances = new ArrayList<>(24);

    for (int i = 0; i < 24; i++) {
        instances.add(new Instances(nodo, c, 1000));
    }

    //Instanciamos conexion con FT
    cFT = new conectarFusionTables();
    Sqlresponse r = cFT.select(TABLAID, "Intervalo, Total", "idNodo = " + nodo + " ",
            "ORDER BY \'Intervalo\' DESC LIMIT 10000");

    try {
        System.err.println(r.toPrettyString());
    } catch (IOException ex) {
        Logger.getLogger(Prediccion.class.getName()).log(Level.SEVERE, null, ex);
    }

    for (List<Object> a : r.getRows()) {
        Instance i = new DenseInstance(2);

        String s0 = (String) a.get(0);
        String s1 = (String) a.get(1);
        int hora = Integer.parseInt(s0.substring(11, 13));

        System.err.println(s0 + " ->" + s1 + "  " + hora);

        i.setValue(instances.get(hora).attribute(0), instances.get(hora).attribute(0).parseDate(s0));
        i.setValue(instances.get(hora).attribute(1), Integer.parseInt(s1));

        instances.get(hora).add(i);

    }

    for (Instances a : instances) {
        a.sort(0);
    }

    return instances;
}

From source file:predictors.HelixIndexer.java

License:Open Source License

/**
 * Analyzes a given window and saves it in the database.
 * /*from   w w w .j  a va 2  s. c om*/
 * @param pssm
 * @param windowCenter
 * @param structure
 */
private void addWindowToDatabase(Pssm pssm, int windowCenter, char[] structure) {
    int index = Mappings.ssToInt(structure[windowCenter]);
    Instance window = this.buildInstance(pssm, windowCenter);

    if (index == Mappings.indexTmh) {
        index = HelixIndexer.indexTmh;
    } else if (index == Mappings.indexSignal) {
        index = HelixIndexer.indexSignal;
    } else {
        index = HelixIndexer.indexNotTmh;
    }

    window.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), index);
    window.setDataset(this.dataset);

    this.dataset.add(window);
}

From source file:predictors.HelixPredictor.java

License:Open Source License

/**
 * Analyzes a given segment (TMH or not) and saves it in the database.
 * /*from  ww w  .ja va2s .  c o m*/
 * @param pssm
 * @param start
 * @param end
 * @param structureIndex
 */
private void addSegmentToDatabse(Pssm pssm, int start, int end, int structureIndex) {
    Instance segment = this.buildInstance(pssm, start, end);

    segment.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), structureIndex);

    segment.setDataset(this.dataset);

    this.dataset.add(segment);
}

From source file:predictors.TopologyPredictor.java

License:Open Source License

/**
 * Analyzes a given window and saves it in the database.
 * /*ww  w . j  ava  2  s  . c om*/
 * @param pssm
 * @param structure
 * @param structureIndex
 * @param startPos
 */
private void addProteinToDatabse(Pssm pssm, char[] structure, int structureIndex, int startPos) {
    ArrayList<Segment> solSegments = findSegments(structure);
    Instance segment = this.buildInstance(pssm, structure, solSegments, startPos);

    segment.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), structureIndex);

    segment.setDataset(this.dataset);

    this.dataset.add(segment);
}

From source file:probcog.bayesnets.core.ClustererDiscretizationFilter.java

License:Open Source License

public String getValueForContinuous(double continuous) {
    Instance inst = new Instance(1);
    inst.setValue(0, continuous);
    try {/*from  ww w . j  ava  2 s .  com*/
        int cluster = clusterer.clusterInstance(inst);
        return outputValues[cluster];
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}