Example usage for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance)

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Recursively generates a tree.//from ww w. j a  v a  2 s .  c om
 * 
 * @param data
 *            the data to work with
 * @param classProbs
 *            the class distribution
 * @param header
 *            the header of the data
 * @param minNum
 *            the minimum number of instances per leaf
 * @param debug
 *            whether debugging is on
 * @param attIndicesWindow
 *            the attribute window to choose attributes from
 * @param random
 *            random number generator for choosing random attributes
 * @param depth
 *            the current depth
 * @param determineStructure
 *            whether to determine structure
 * @param m_distributionData
 *            HashMap to put distribution data if getSplitData is true in
 *            any node
 * @throws Exception
 *             if generation fails
 */
protected void buildTree(Instances data, double[] classProbs, Instances header, boolean debug, int depth,
        JsonNode node, int parent_index, HashMap m_distributionData, Instances requiredInstances,
        LinkedHashMap<String, Classifier> custom_classifiers, List<CustomSet> cSList,
        CustomClassifierService ccService, Dataset ds) throws Exception {

    if (mapper == null) {
        mapper = new ObjectMapper();
    }
    // Store structure of dataset, set minimum number of instances
    m_Info = header;
    m_Debug = debug;

    // if in dead json return
    if (node == null) {
        m_Attribute = -1;
        m_ClassDistribution = null;
        m_Prop = null;
        return;
    }

    // Make leaf if there are no training instances
    if (data.numInstances() == 0) {
        m_Attribute = -1;
        m_ClassDistribution = null;
        m_Prop = null;
        return;
    }

    // Check if node doesn't contain enough instances or is pure
    // or maximum depth reached
    m_ClassDistribution = classProbs.clone();
    cSetList = cSList;
    ccSer = ccService;
    d = ds;

    // if (Utils.sum(m_ClassDistribution) < 2 * m_MinNum
    // || Utils.eq(m_ClassDistribution[Utils.maxIndex(m_ClassDistribution)],
    // Utils
    // .sum(m_ClassDistribution))
    // || ((getMaxDepth() > 0) && (depth >= getMaxDepth()))) {
    // // Make leaf
    // m_Attribute = -1;
    // m_Prop = null;
    // return;
    // }

    // Investigate the selected attribute
    int attIndex = parent_index;

    // options child added by web client developer
    // TODO work with him to make a more meaningful structure...
    JsonNode options = node.get("options");
    if (options == null) {
        return;
    }
    String kind = options.get("kind").asText();
    JsonNode att_name = options.get("attribute_name");
    Boolean getSplitData = false;
    Boolean getInstanceData = false;
    // this allows me to modify the json tree structure to add data about
    // the evaluation
    ObjectNode evalresults = (ObjectNode) options;
    ObjectNode _node = (ObjectNode) node;
    //For Roc - Node Match
    _node.set("roc_uid_0", null);
    _node.set("roc_uid_1", null);
    Map<String, JsonNode> sons = new HashMap<String, JsonNode>();
    // String name = node_name.asText();
    if (kind != null && kind.equals("split_node") && att_name != null) { //
        // attIndex = data.attribute(node_id.asText()).index();
        if (!att_name.asText().equals("") && !att_name.asText().contains("custom_classifier")
                && !att_name.asText().contains("custom_tree") && !att_name.asText().contains("custom_set")) {
            attIndex = data.attribute(att_name.asText()).index();
        } else {
            if (att_name.asText().contains("custom_set")) {
                int ctr = 0;
                for (CustomSet c : cSList) {
                    if (c.getId() == Long.valueOf(att_name.asText().replace("custom_set_", ""))) {
                        break;
                    }
                    ctr++;
                }
                attIndex = (data.numAttributes() - 1) + custom_classifiers.size() + ctr;
            } else {
                if (att_name.asText().contains("custom_classifier_new")) {
                    HashMap mp = ccSer.buildCustomClasifier(data,
                            Long.valueOf(att_name.asText().replace("custom_classifier_new_", "")));
                    Classifier fc = (Classifier) mp.get("classifier");
                    custom_classifiers.put("custom_classifier_" + mp.get("id"), fc);
                    evalresults.put("unique_id", "custom_classifier_" + mp.get("id"));
                    evalresults.put("attribute_name", "custom_classifier_" + mp.get("id"));
                    att_name = evalresults.get("attribute_name");
                }
                int ctr = 0;
                for (String key : custom_classifiers.keySet()) {
                    if (key.equals(att_name.asText())) {
                        break;
                    }
                    ctr++;
                }
                attIndex = (data.numAttributes() - 1) + ctr;
            }
        }
        if (node.get("getSplitData") != null) {
            getSplitData = node.get("getSplitData").asBoolean();
        }
        JsonNode split_values = node.get("children");
        int c = 0;
        if (split_values != null && split_values.size() > 0) {
            for (JsonNode svalue : split_values) {
                String key = svalue.get("name").asText();
                JsonNode son = svalue.get("children").get(0);
                if (key.contains("<")) {
                    key = "low";
                } else if (key.contains(">")) {
                    key = "high";
                }
                sons.put(key, son);
                c++;
            }
        }
        // LOGGER.debug("Id name "+att_name+" index "+attIndex+" type "+kind+" sons "+c);
    } else {
        // LOGGER.debug("non split node, name "+att_name+" type "+kind);
    }

    double[] vals = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()];
    double[][][] dists = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0][0];
    double[][] props = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0];
    double[] splits = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()];
    listOfFc = custom_classifiers;
    // Compute class distributions and value of splitting
    // criterion for each attribute
    HashMap<String, Double> mp = new HashMap<String, Double>();
    if (attIndex >= data.numAttributes() && attIndex < data.numAttributes() + custom_classifiers.size()) {
        mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
    } else if (attIndex >= data.numAttributes() + custom_classifiers.size() - 1) {
        mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
    } else {
        if (options.get("split_point") != null) {
            mp = distribution(props, dists, attIndex, data, options.get("split_point").asDouble(),
                    custom_classifiers);
        } else {
            mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
        }
    }

    splits[attIndex] = mp.get("split_point");
    vals[attIndex] = gain(dists[attIndex], priorVal(dists[attIndex]));

    m_Attribute = attIndex;
    double[][] distribution = dists[m_Attribute];

    // stop if input json tree does not contain any more children
    // replacing Utils.gr(vals[m_Attribute], 0)&&
    if (kind != null && kind.equals("split_node") && att_name != null) {
        //Assign Classes for custom sets(visual splits).
        m_ClassAssignment.put("Inside", Utils.maxIndex(dists[m_Attribute][1]));
        m_ClassAssignment.put("Outside", (Utils.maxIndex(dists[m_Attribute][1]) == 1) ? 0 : 1);
        // Build subtrees
        m_SplitPoint = splits[m_Attribute];
        m_Prop = props[m_Attribute];
        Instances[] subsets = splitData(data);
        m_Successors = new ManualTree[distribution.length];

        // record quantity and quality measures for node
        int quantity = 0;
        for (int i = 0; i < distribution.length; i++) {
            quantity += subsets[i].numInstances();
        }
        evalresults.put("bin_size", quantity);
        evalresults.put("infogain", vals[m_Attribute]);
        evalresults.put("majClass", m_Info.classAttribute().value(Utils.maxIndex(m_ClassDistribution)));
        evalresults.put("split_point", m_SplitPoint);
        evalresults.put("orig_split_point", mp.get("orig_split_point"));

        if (Boolean.TRUE.equals(getSplitData)) {
            addDistributionData(data, m_Attribute, m_distributionData);
        }

        int maxIndex = 0;
        double maxCount = 0;
        double errors = 0;
        double[] classDist = new double[2];
        double pct_correct = 0;
        double bin_size = 0;

        for (int i = 0; i < distribution.length; i++) {
            m_Successors[i] = new ManualTree();
            m_Successors[i].setKValue(m_KValue);
            m_Successors[i].setMaxDepth(getMaxDepth());

            //To compute class distribution for split node.
            for (int j = 0; j < distribution[i].length; j++) {
                classDist[j] += distribution[i][j];
            }
            // test an instance to see which child node to send its subset
            // down.
            // after split, should hold for all in set
            String child_name = "";
            Instances subset = subsets[i];
            if (subset == null || subset.numInstances() == 0) {
                continue;
            }
            Instance inst = subset.instance(0);
            if (m_Attribute >= data.numAttributes()
                    && m_Attribute < data.numAttributes() + custom_classifiers.size()) {
                double predictedClass = custom_classifiers.get(att_name.asText()).classifyInstance(inst);
                child_name = m_Info.classAttribute().value((int) predictedClass);

            } else if (m_Attribute >= data.numAttributes() + custom_classifiers.size() - 1) {
                CustomSet cSet = getReqCustomSet(
                        m_Attribute - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList);
                JsonNode vertices = mapper.readTree(cSet.getConstraints());
                ArrayList<double[]> attrVertices = generateVerticesList(vertices);
                List<Attribute> aList = generateAttributeList(cSet, data, ds);
                double[] testPoint = new double[2];
                testPoint[0] = inst.value(aList.get(0));
                testPoint[1] = inst.value(aList.get(1));
                int check = checkPointInPolygon(attrVertices, testPoint);
                if (check == 0) {
                    child_name = "Outside";
                } else {
                    child_name = "Inside";
                }
            } else {
                // which nominal attribute is this split linked to?
                if (subset.attribute(m_Attribute).isNominal()) {
                    child_name = inst.attribute(m_Attribute).value((int) inst.value(m_Attribute));
                }
                // otherwise, if we have a numeric attribute, are we going
                // high or low?
                else if (data.attribute(m_Attribute).isNumeric()) {
                    if (inst.value(m_Attribute) < m_SplitPoint) {
                        child_name = "low";
                    } else {
                        child_name = "high";
                    }
                }
            }
            m_Successors[i].setM_ClassAssignment((HashMap<String, Integer>) m_ClassAssignment.clone());
            JsonNode son = sons.get(child_name);
            if (son != null) {
                m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, son,
                        attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList, ccService,
                        ds);
            } else {
                // if we are a split node with no input children, we need to
                // add them into the tree
                // JsonNode split_values = node.get("children");
                if (kind != null && kind.equals("split_node")) {
                    ArrayNode children = (ArrayNode) node.get("children");
                    if (children == null) {
                        children = mapper.createArrayNode();
                    }
                    ObjectNode child = mapper.createObjectNode();
                    child.put("name", child_name);
                    ObjectNode c_options = mapper.createObjectNode();
                    c_options.put("attribute_name", child_name);
                    c_options.put("kind", "split_value");
                    child.put("options", c_options);
                    children.add(child);
                    _node.put("children", children);
                    m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, child,
                            attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList,
                            ccService, ds);

                } else {
                    // for leaf nodes, calling again ends the cycle and
                    // fills up the bins appropriately
                    m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, node,
                            attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList,
                            ccService, ds);
                }
            }
        }

        // Compute pct_correct from distributions and send to split_node
        bin_size = Utils.sum(classDist);
        maxIndex = Utils.maxIndex(classDist);
        maxCount = classDist[maxIndex];
        String class_name = m_Info.classAttribute().value(maxIndex);
        _node.put("majClass", class_name);
        errors += bin_size - maxCount;

        pct_correct = (quantity - errors) / quantity;
        evalresults.put("pct_correct", pct_correct);
        // If all successors are non-empty, we don't need to store the class
        // distribution
        boolean emptySuccessor = false;
        for (int i = 0; i < subsets.length; i++) {
            if (m_Successors[i].m_ClassDistribution == null) {
                emptySuccessor = true;
                break;
            }
        }
        if (!emptySuccessor) {
            m_ClassDistribution = null;
        }
    } else {
        m_Attribute = -1;
        if (kind != null && kind.equals("leaf_node")) {
            double bin_size = 0, maxCount = 0;
            int maxIndex = 0;
            double errors = 0;
            double pct_correct = 0;
            if (m_ClassDistribution != null) {
                bin_size = Utils.sum(m_ClassDistribution);
                maxIndex = Utils.maxIndex(m_ClassDistribution); // this is
                // where it
                // decides
                // what
                // class the
                // leaf is..
                // takes the
                // majority.
                maxCount = m_ClassDistribution[maxIndex];
                errors = bin_size - maxCount;
                pct_correct = (bin_size - errors) / bin_size;
            }
            if (node.get("pickInst") != null) {
                getInstanceData = node.get("pickInst").asBoolean();
            }
            if (Boolean.TRUE.equals(getInstanceData)) {
                requiredInstances.delete();
                for (int k = 0; k < data.numInstances(); k++) {
                    requiredInstances.add(data.instance(k));
                }
            }
            String class_name = m_Info.classAttribute().value(maxIndex);
            _node.put("majClass", class_name);
            if (node.get("setClass") != null) {
                String setClass = node.get("setClass").asText();
                class_name = m_Info.classAttribute().value(m_ClassAssignment.get(setClass));
            }
            _node.put("name", class_name);
            evalresults.put("attribute_name", class_name);
            evalresults.put("kind", "leaf_node");
            evalresults.put("bin_size", Utils.doubleToString(bin_size, 2));
            evalresults.put("errors", Utils.doubleToString(errors, 2));
            evalresults.put("pct_correct", Utils.doubleToString(pct_correct, 2));
            this.setJsonnode(_node);
        } else {
            // Make leaf

            // add the data to the json object
            double bin_size = 0, maxCount = 0;
            int maxIndex = 0;
            double errors = 0;
            double pct_correct = 0;
            if (m_ClassDistribution != null) {
                bin_size = Utils.sum(m_ClassDistribution);
                maxIndex = Utils.maxIndex(m_ClassDistribution); // this is
                // where it
                // decides
                // what
                // class the
                // leaf is..
                // takes the
                // majority.
                maxCount = m_ClassDistribution[maxIndex];
                errors = bin_size - maxCount;
                pct_correct = (bin_size - errors) / bin_size;
            }
            ArrayNode children = (ArrayNode) node.get("children");
            if (children == null) {
                children = mapper.createArrayNode();
            }
            ObjectNode child = mapper.createObjectNode();
            String class_name = m_Info.classAttribute().value(maxIndex);
            child.put("majClass", class_name);
            String nodeName = node.get("name").asText();
            if (nodeName.equals("Inside") || nodeName.equals("Outside")) {
                child.put("setClass", nodeName);
                class_name = m_Info.classAttribute().value(m_ClassAssignment.get(nodeName));
            }
            child.put("name", class_name);
            ObjectNode c_options = mapper.createObjectNode();
            c_options.put("attribute_name", class_name);
            c_options.put("kind", "leaf_node");
            c_options.put("bin_size", Utils.doubleToString(bin_size, 2));
            c_options.put("errors", Utils.doubleToString(errors, 2));
            c_options.put("pct_correct", Utils.doubleToString(pct_correct, 2));
            child.put("options", c_options);
            children.add(child);
            _node.put("children", children);
            this.setJsonnode(child);
        }
    }
}

From source file:org.ssase.debt.classification.OnlineMultilayerPerceptron.java

License:Open Source License

public Instances getInstances(Instance inst) {
    Instances insts;
    FastVector atts = new FastVector();
    for (int i = 0; i < inst.numAttributes(); i++) {
        atts.addElement(inst.attribute(i));
    }/*from   ww  w. j  av a 2 s. c om*/
    insts = new Instances("CurrentTrain", atts, 0);
    insts.add(inst);
    insts.setClassIndex(inst.numAttributes() - 1);
    return insts;
}

From source file:org.stream_gpu.float_knn.float_search.LinearNNSearch_Float.java

License:Open Source License

/**
 * Returns k nearest instances in the current neighbourhood to the supplied
 * instance.//from  w ww . j  a  v  a2s .  c  om
 *  
 * @param target    The instance to find the k nearest neighbours for.
 * @param kNN      The number of nearest neighbours to find.
 * @return      the k nearest neighbors
 * @throws Exception  if the neighbours could not be found.
 */
public Instances kNearestNeighbours(Instance target, int kNN) throws Exception {

    //debug
    boolean print = false;

    if (m_Stats != null)
        m_Stats.searchStart();

    MyHeap heap = new MyHeap(kNN);
    float distance;
    int firstkNN = 0;
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        if (target == m_Instances.instance(i)) //for hold-one-out cross-validation
            continue;
        if (m_Stats != null)
            m_Stats.incrPointCount();
        if (firstkNN < kNN) {
            if (print)
                System.out.println("K(a): " + (heap.size() + heap.noOfKthNearest()));
            distance = m_float_distance.distance(target, m_Instances.instance(i), Float.POSITIVE_INFINITY,
                    m_Stats);
            if (distance == 0.0 && m_SkipIdentical)
                if (i < m_Instances.numInstances() - 1)
                    continue;
                else
                    heap.put(i, distance);
            heap.put(i, distance);
            firstkNN++;
        } else {
            MyHeapElement temp = heap.peek();
            if (print)
                System.out.println("K(b): " + (heap.size() + heap.noOfKthNearest()));
            distance = m_float_distance.distance(target, m_Instances.instance(i), (float) temp.distance,
                    m_Stats);
            if (distance == 0.0F && m_SkipIdentical)
                continue;
            if (distance < temp.distance) {
                heap.putBySubstitute(i, distance);
            } else if (distance == temp.distance) {
                heap.putKthNearest(i, distance);
            }

        }
    }

    Instances neighbours = new Instances(m_Instances, (heap.size() + heap.noOfKthNearest()));
    m_Distances = new double[heap.size() + heap.noOfKthNearest()];
    int[] indices = new int[heap.size() + heap.noOfKthNearest()];
    int i = 1;
    MyHeapElement h;
    while (heap.noOfKthNearest() > 0) {
        h = heap.getKthNearest();
        indices[indices.length - i] = h.index;
        m_Distances[indices.length - i] = (float) h.distance;
        i++;
    }
    while (heap.size() > 0) {
        h = heap.get();
        indices[indices.length - i] = h.index;
        m_Distances[indices.length - i] = (float) h.distance;
        i++;
    }

    m_float_distance.postProcessDistances(m_Distances);

    for (int k = 0; k < indices.length; k++) {
        neighbours.add(m_Instances.instance(indices[k]));
    }

    if (m_Stats != null)
        m_Stats.searchFinish();

    return neighbours;
}

From source file:org.textmin.tubes.dtm.decorator.InstancesBuilder.java

public Instances buildInstances(String rel, String[] classes) {
    ArrayList<String> classNominal = new ArrayList<>(classes.length);
    classNominal.addAll(Arrays.asList(classes));

    Attribute classesAttr = new Attribute("__TheClass", classNominal);
    ArrayList<Attribute> attributes = new ArrayList<>(termCount() + 1);
    attributes.add(classesAttr);// w  ww . j  ava  2 s .  c o  m
    termSet().forEach(e -> attributes.add(new Attribute(e)));

    Instances resultInstances = new Instances(rel, attributes, documentCount());
    resultInstances.setClassIndex(0);

    for (String d : documentSet()) {
        DenseInstance docInstance = new DenseInstance(attributes.size());
        docInstance.setValue(classesAttr, getClass(d));
        for (int i = 1; i < attributes.size(); i++) {
            Attribute attr = attributes.get(i);
            docInstance.setValue(i, getValue(d, attr.name()));
        }
        resultInstances.add(docInstance);
    }

    return resultInstances;
}

From source file:org.univ.montp2.master.gmin313.DataCrawler.java

public static Instances createDataset(String directoryPath) throws Exception {

    FastVector atts = new FastVector(4);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("title", (FastVector) null));
    atts.addElement(new Attribute("content", (FastVector) null));
    FastVector classes = new FastVector(3);
    classes.addElement("positif");
    classes.addElement("negatif");
    classes.addElement("neutre");
    atts.addElement(new Attribute("class", classes));
    Instances data = new Instances("text_files_in_" + directoryPath, atts, 0);

    File dir = new File(directoryPath);
    List<String> stopWords = getListClassifier("stopw.txt");
    List<String> posWords = getListClassifier("GI_pos_sansNeutre.txt");
    List<String> negWords = getListClassifier("GI_neg_sansNeutre.txt");
    String[] files = dir.list();//www .j  a v  a 2  s  .c  o m
    for (int i = 0; i < files.length; i++) {
        if (files[i].endsWith(".txt") && files[i].length() > 0) {
            try {
                double[] newInst = new double[4];
                newInst[0] = (double) data.attribute(0).addStringValue(files[i]);
                File txt = new File(directoryPath + File.separator + files[i]);
                FileInputStream is = new FileInputStream(txt);
                int c;
                DataInputStream in = new DataInputStream(is);
                BufferedReader br = new BufferedReader(new InputStreamReader(in));
                String strLine, strValue = "";
                int score_positif = 0;
                int score_negatif = 0;
                for (int j = 1; j < 3; j++) {
                    strLine = br.readLine();
                    System.out.println(strLine);
                    StringTokenizer tokenizer = new StringTokenizer(strLine, "  ,;':%?!");
                    String token;
                    while (tokenizer.hasMoreTokens()) {
                        token = tokenizer.nextToken();
                        //System.out.println("Current Token " + token);
                        if (!stopWords.contains(token.toLowerCase())) {
                            //System.out.println("Added Token " + token);
                            strValue += token.toLowerCase() + " ";
                        }
                        // valcul du score
                        // si positif score
                        if (posWords.contains(token.toLowerCase()))
                            score_positif++;
                        if (negWords.contains(token.toLowerCase()))
                            ;
                        score_negatif++;
                    }
                    newInst[j] = (double) data.attribute(j).addStringValue(strValue);
                }
                if (score_positif > score_negatif) {
                    newInst[3] = (double) data.attribute(3).indexOfValue("positif");
                } else if (score_positif < score_negatif) {
                    newInst[3] = (double) data.attribute(3).indexOfValue("negatif");
                } else {
                    newInst[3] = (double) data.attribute(3).indexOfValue("neutre");
                }
                //newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString());
                data.add(new Instance(1.0, newInst));
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]);
            }
        }
    }
    return data;
}

From source file:org.univ.montp2.master.ncbi.api.ncbi.java

public static Instances getDataSet(String directoryPath) {
    Charset charset = Charset.defaultCharset();
    List<String> stringList;
    try {//  www . j a  va 2 s  .co  m
        FastVector atts = new FastVector(2);
        atts.addElement(new Attribute("content", (FastVector) null));
        FastVector classes = new FastVector(3);
        classes.addElement("positive");
        classes.addElement("negative");
        classes.addElement("neutre");
        atts.addElement(new Attribute("class", classes));

        Instances data = new Instances("text_files_in_" + directoryPath, atts, 0);

        File dir = new File(directoryPath);
        List<String> stopWords = getListClassifier("stopwordsenglish.txt");
        String[] files = dir.list();
        for (String file : files) {
            if (file.endsWith(".txt") && file.length() > 0) {
                try {
                    stringList = Files.readAllLines(new File(directoryPath + "/" + file).toPath(), charset);
                    String[] stringArray = stringList.toArray(new String[] {});
                    String strValue = "";
                    double[] newInst = new double[2];
                    for (int j = 0; j < stringArray.length - 1; j++) {
                        String replaceAll = stringArray[j].toLowerCase().replaceAll(
                                "[^a-z]", " ");
                        StringTokenizer tokenizer = new StringTokenizer(replaceAll, " ");
                        String token;
                        while (tokenizer.hasMoreTokens()) {
                            token = tokenizer.nextToken();
                            if (!stopWords.contains(token.toLowerCase())) {
                                strValue += token.toLowerCase() + " ";
                            }

                        }
                    }
                    newInst[0] = (double) data.attribute(0).addStringValue(strValue);
                    String opinion = "negative";
                    String strLine = stringArray[stringArray.length - 1];
                    if (strLine != null && !"".equals(strLine)) {
                        opinion = strLine.substring(strLine.indexOf('>') + 1);
                        System.out.println("strLine " + opinion);
                    }
                    newInst[1] = (double) data.attribute(1).indexOfValue(opinion);
                    data.add(new Instance(1.0, newInst));
                } catch (Exception e) {
                    System.err.println("failed to convert file: " + directoryPath + File.separator + file
                            + " Exception " + e.getMessage());
                }
            }
        }
        return data;
    } catch (Exception ex) {
        Logger.getLogger(ncbi.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:oxis.yologp.YOLogPDescriptor.java

License:Open Source License

/**
 * Predict the LogP./*ww w  .ja v  a  2s  .  c o  m*/
 *
 */
private void predict() throws Exception {

    Instances instances = buildDataset();

    Map<Object, Object> properties;
    for (DrugStruct drugStruct : listDrug) {

        if (drugStruct.drug.getProperty("flag")) {
            properties = drugStruct.drug.getProperties();
            Instance instance = new DenseInstance(instances.numAttributes()); //28 + 1024
            instance.setDataset(instances);
            for (Object propKey : properties.keySet()) {
                if (!(propKey.equals("hash") || propKey.equals("flag") || propKey.equals("smiles"))) {
                    try {
                        instance.setValue(instances.attribute(propKey.toString()),
                                Double.parseDouble(properties.get(propKey).toString()));
                    } catch (NullPointerException ex) {
                        Logger.getLogger(YOLogPDescriptor.class.getName()).log(Level.WARNING,
                                "Property not used: {0}", propKey.toString());
                    }
                }
            }

            double predicted = model.classifyInstance(instance);
            predicted = Math.round(predicted * 100) / 100.0d;
            instance.setClassValue(predicted);
            instances.add(instance);
            drugStruct.drug.setProperty("predicted", predicted);
        }
    }
}

From source file:oxis.yologp.YOLogPDescriptor.java

License:Open Source License

/**
 * Train a model, erase the other one/*from w w  w.j av  a  2  s  .  c o m*/
 *
 * @param String name of the model to save
 */
public void train(String name) throws Exception {

    compute();

    Instances instances = buildDataset();

    model = new RandomForest();

    Map<Object, Object> properties;
    for (DrugStruct drugStruct : listDrug) {

        if (drugStruct.drug.getProperty("flag")) {
            properties = drugStruct.drug.getProperties();
            Instance instance = new DenseInstance(instances.numAttributes()); //28 + 1024
            instance.setDataset(instances);
            for (Object propKey : properties.keySet()) {
                if (!(propKey.equals("hash") || propKey.equals("flag") || propKey.equals("smiles"))) {
                    try {
                        instance.setValue(instances.attribute(propKey.toString()),
                                Double.parseDouble(properties.get(propKey).toString()));
                    } catch (NullPointerException ex) {
                        Logger.getLogger(YOLogPDescriptor.class.getName()).log(Level.WARNING,
                                "Property not used: {0}", propKey.toString());
                    }
                }
            }
            instance.setClassValue(drugStruct.getLogP());
            instances.add(instance);
        }
    }
    model.setNumFeatures(200);
    model.setNumTrees(400);
    model.setMaxDepth(0);
    model.buildClassifier(instances);

    weka.core.SerializationHelper.write(path + name, model);
}

From source file:PointAnalyser.Main.java

public static int classifyC45Instance(ArrayList<String> inst) throws Exception {

    //   ConverterUtils.DataSource source = new ConverterUtils.DataSource("Z:\\\\shared from vm\\\\fifthset\\\\newmixed.csv");
    // Instances data = source.getDataSet();
    // setting class attribute if the data format does not provide this information
    // For example, the XRFF format saves the class attribute information as well
    //  if (data.classIndex() == -1) {
    //      data.setClassIndex(data.numAttributes() - 1);
    //  }// ww w . ja v a2 s. co  m
    NumericToNominal nmf = new NumericToNominal();
    nmf.setInputFormat(data);
    // data = Filter.useFilter(data, nmf);

    // build a c4.5 classifier
    // eval
    //  Evaluation eval = new Evaluation(data);
    //eval.crossValidateModel(tree, data, 10, new Random(1));
    //  System.out.println(eval.toSummaryString());
    //  System.out.println(eval.toMatrixString());
    // System.out.println(eval.toClassDetailsString());
    //tree.classifyInstance(null)
    Attribute a = new Attribute("ring(dragoon)");
    Attribute b = new Attribute("ring(zerg1)");
    Attribute c = new Attribute("ring(zerg2)");
    Attribute d = new Attribute("ring(zerg3)");

    ArrayList<String> classVal = new ArrayList<String>();
    classVal.add("safe");
    Attribute e = new Attribute("@@class@@", classVal);

    ArrayList<Attribute> attributeList = new ArrayList<Attribute>();
    attributeList.add(a);
    attributeList.add(b);
    attributeList.add(c);
    attributeList.add(d);
    attributeList.add(e);

    Instances dataSet = new Instances("TestInstances", attributeList, 0);
    dataSet = Filter.useFilter(dataSet, nmf);
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    DenseInstance ins = new DenseInstance(dataSet.numAttributes());
    dataSet.add(ins);
    ins.setDataset(dataSet);
    ins.setValue(a, Integer.valueOf(inst.get(0)));
    ins.setValue(b, Integer.valueOf(inst.get(1)));
    ins.setValue(c, Integer.valueOf(inst.get(2)));
    ins.setValue(d, Integer.valueOf(inst.get(3)));

    return (int) tree.classifyInstance(ins);
}

From source file:PointAnalyser.Main.java

public static int classifyNNInstance(ArrayList<String> inst) throws Exception {
    NumericToNominal nmf = new NumericToNominal();
    nmf.setInputFormat(data);/* w  w  w .ja  v  a 2 s  .  c  o m*/

    Attribute a = new Attribute("ring(dragoon)");
    Attribute b = new Attribute("ring(zerg1)");
    Attribute c = new Attribute("ring(zerg2)");
    Attribute d = new Attribute("ring(zerg3)");

    ArrayList<String> classVal = new ArrayList<String>();
    classVal.add("safe");
    Attribute e = new Attribute("@@class@@", classVal);

    ArrayList<Attribute> attributeList = new ArrayList<Attribute>();
    attributeList.add(a);
    attributeList.add(b);
    attributeList.add(c);
    attributeList.add(d);
    attributeList.add(e);

    Instances dataSet = new Instances("TestInstances", attributeList, 0);
    dataSet = Filter.useFilter(dataSet, nmf);
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    DenseInstance ins = new DenseInstance(dataSet.numAttributes());
    dataSet.add(ins);
    ins.setDataset(dataSet);
    ins.setValue(a, Integer.valueOf(inst.get(0)));
    ins.setValue(b, Integer.valueOf(inst.get(1)));
    ins.setValue(c, Integer.valueOf(inst.get(2)));
    ins.setValue(d, Integer.valueOf(inst.get(3)));

    return (int) nn.classifyInstance(ins);
}