Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity) 

Source Link

Document

Creates an empty set of instances.

Usage

From source file:cezeri.utils.FactoryInstance.java

public static Instances generateInstances(String relationName, CMatrix cm) {
    FastVector att = new FastVector();
    for (int i = 0; i < cm.getColumnNumber(); i++) {
        att.addElement(new Attribute("f" + (i + 1)));
    }//from   w  ww . j  a  va 2  s  .  c  o  m
    Instances ret = new Instances(relationName, att, cm.getRowNumber());
    for (int i = 0; i < cm.getRowNumber(); i++) {
        Instance ins = new Instance(cm.getColumnNumber());
        for (int j = 0; j < cm.getColumnNumber(); j++) {
            ins.setValue(j, cm.get2DArrayDouble()[i][j]);
        }
        ret.add(ins);
    }
    ret.setClassIndex(ret.numAttributes() - 1);
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static Instances generateInstances(String relationName, double[][] m) {
    FastVector att = new FastVector();
    for (int i = 0; i < m[0].length; i++) {
        att.addElement(new Attribute("f" + (i + 1)));
    }/*from  w w w .j  a  v a2 s  . co m*/
    Instances ret = new Instances(relationName, att, m.length);
    for (int i = 0; i < m.length; i++) {
        Instance ins = new Instance(m[0].length);
        for (int j = 0; j < m[0].length; j++) {
            ins.setValue(j, m[i][j]);
        }
        ret.add(ins);
    }
    ret.setClassIndex(ret.numAttributes() - 1);
    return ret;
}

From source file:clasificacion.Clasificacion.java

public String clasificar(String[] testCases) throws Exception {
    String ruta = "nursery_model.model";

    InputStream classModelStream;
    classModelStream = getClass().getResourceAsStream(ruta);
    //classModel = (Classifier)SerializationHelper.read(classModelStream);
    Classifier clasify = (Classifier) SerializationHelper.read(classModelStream);

    FastVector parents = new FastVector();
    parents.addElement("usual");
    parents.addElement("pretentious");
    parents.addElement("great_pret");
    Attribute _parent = new Attribute("parents", parents);

    FastVector nurs = new FastVector();
    nurs.addElement("proper");
    nurs.addElement("less_proper");
    nurs.addElement("improper");
    nurs.addElement("critical");
    nurs.addElement("very_crit");
    Attribute _has_nurs = new Attribute("has_nurs", nurs);

    FastVector form = new FastVector();
    form.addElement("complete");
    form.addElement("completed");
    form.addElement("incomplete");
    form.addElement("foster");
    Attribute _form = new Attribute("form", form);

    FastVector children = new FastVector();
    children.addElement("1");
    children.addElement("2");
    children.addElement("3");
    children.addElement("more");
    Attribute _children = new Attribute("children", children);

    FastVector housing = new FastVector();
    housing.addElement("convenient");
    housing.addElement("less_conv");
    housing.addElement("critical");
    Attribute _housing = new Attribute("housing", housing);

    FastVector finance = new FastVector();
    finance.addElement("convenient");
    finance.addElement("inconv");
    Attribute _finance = new Attribute("finance", finance);

    FastVector social = new FastVector();
    social.addElement("nonprob");
    social.addElement("slightly_prob");
    social.addElement("problematic");
    Attribute _social = new Attribute("social", social);

    FastVector health = new FastVector();
    health.addElement("recommended");
    health.addElement("priority");
    health.addElement("not_recom");
    Attribute _health = new Attribute("health", health);

    FastVector Class = new FastVector();
    Class.addElement("not_recom");
    Class.addElement("recommend");
    Class.addElement("very_recom");
    Class.addElement("priority");
    Class.addElement("spec_prior");
    Attribute _Class = new Attribute("class", Class);

    FastVector atributos = new FastVector(9);
    atributos.addElement(_parent);/*from w w w.j a v  a2s  .co m*/
    atributos.addElement(_has_nurs);
    atributos.addElement(_form);
    atributos.addElement(_children);
    atributos.addElement(_housing);
    atributos.addElement(_finance);
    atributos.addElement(_social);
    atributos.addElement(_health);
    atributos.addElement(_Class);

    ArrayList<Attribute> atributs = new ArrayList<>();
    atributs.add(_parent);
    atributs.add(_has_nurs);
    atributs.add(_form);
    atributs.add(_children);
    atributs.add(_housing);
    atributs.add(_finance);
    atributs.add(_social);
    atributs.add(_health);
    atributs.add(_Class);

    //Aqu se crea la instacia, que tiene todos los atributos del modelo
    Instances dataTest = new Instances("TestCases", atributos, 1);
    dataTest.setClassIndex(8);

    Instance setPrueba = new Instance(9);

    int index = -1;
    for (int i = 0; i < 8; i++) {
        index = atributs.get(i).indexOfValue(testCases[i]);
        //System.out.println(i + " " + atributs.get(i)  + " " + index + " " + testCases[i]);
        setPrueba.setValue(atributs.get(i), index);
    }

    //Agregando el set que se desea evaluar.
    dataTest.add(setPrueba);

    //Realizando la Prediccin
    //La instancia es la 0 debido a que es la unica que se encuentra.
    double valorP = clasify.classifyInstance(dataTest.instance(0));
    //get the name of the class value
    String prediccion = dataTest.classAttribute().value((int) valorP);

    return prediccion;
}

From source file:clasificador.ClasificadorADN.java

public ArrayList<Object> clasificar(File datos, int modelo, int sitio, String RutaModelo,
        boolean seleccionAtributos, int[] vectorAtributos, int limI, int limS, double umbral) throws Exception {

    String genstr = "", genstrclean = "";
    switch (sitio) {
    case 0:// ww w  .j a  v a  2  s .c o  m
        genstr = "g,t";
        genstrclean = "gt";
        TextoGen = "Exon-Intron-GT-";
        break;
    case 1:
        genstr = "a,g";
        genstrclean = "ag";
        TextoGen = "Intron-Exon-AG-";
        break;
    case 2:
        genstr = ",";
        TextoGen = "Exon-ZonaIntergenica-";
        break;
    case 3:
        genstr = ",";
        TextoGen = "ZonaIntergenica-Exon-";
        break;
    }
    LeerArchivo arcp = new LeerArchivo(datos.getPath());

    int sitiosTrans = arcp.CantidadOcurrencias(genstr);

    if (!seleccionAtributos) {
        inicializarVectorAtributos(sitio, (limI + limS));
    } else {
        crearAtributos(sitio, vectorAtributos.length + 1, vectorAtributos);
    }

    datapredict = new Instances(TextoGen, atts, sitiosTrans); //?? sitiosTrans implica  crear mas instancias de las que realmente son. Debe corregirse.

    posiciones = new Integer[sitiosTrans];

    int ConPos = 0;
    int contador = 0;
    int longLinea, limInf, limSup;
    String linea = arcp.LeerLinea();
    String captura;
    linea = linea.replace("[", "");
    linea = linea.replace("]", "");
    linea = linea.replace(",", "");
    longLinea = linea.length();

    int ocurrencias = sitio <= 1 ? sitiosTrans : longLinea, i = -1;
    System.out.println("Ocurrencias " + ocurrencias);

    for (int x = 0; x < ocurrencias; x++) {
        if (sitio <= 1) {
            i = linea.indexOf(genstrclean, i + 1);
        } else {
            i = x;
        }

        captura = "";
        try {
            limInf = i - limI;
            limSup = i + limS + (sitio <= 1 ? 2 : 0);

            if (limInf > 0 && limSup < longLinea) {

                captura = linea.substring(limInf, i);
                captura = captura + linea.substring(i + (sitio <= 1 ? 2 : 0), limSup);
                //System.out.println("Limite inferior" + limInf + " Limite superior " + limSup + " Captura: " + captura);
                contador++;

                captura = captura.replace("a", "0");
                captura = captura.replace("c", "1");
                captura = captura.replace("g", "2");
                captura = captura.replace("t", "3");

                String[] bases = captura.split("");
                int canAtrib = datapredict.numAttributes();
                double[] attValues = new double[canAtrib];

                for (int k = 0; k < canAtrib - 1; k++) {
                    if (seleccionAtributos) {
                        attValues[k] = Integer.parseInt(bases[vectorAtributos[k]]);
                    } else {
                        attValues[k] = Integer.parseInt(bases[k]);
                    }
                }

                datapredict.add(new Instance(1.0, attValues));// ?? Por que se crea la instancia de este modo.
                                                              // ?? Es el unico modo de hacerlo en nuestro caso?
                posiciones[ConPos] = i;
                ConPos++;

            }
        } catch (StringIndexOutOfBoundsException e) {
        }

    }

    datapredict.setClassIndex(datapredict.numAttributes() - 1);
    cargarModelo(RutaModelo, modelo);

    clasificar(modelo, umbral);
    reportarResultados(true);

    if (sitio == 1) {

        for (int pos = 0; pos < positivos.size(); pos++) {

            positivos.set(pos, positivos.get(pos) + 1);

        }

    }

    ArrayList<Object> results = new ArrayList<>();

    results.add((Object) (positivos));
    results.add((Object) (distPos));

    return results;
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * determines the dictionary.//www  .  ja  v a  2s.  c  o m
 */
private void determineDictionary() {
    if (forcedAttributes == null) {
        // initialize stopwords
        Stopwords stopwords = new Stopwords();
        if (getUseStoplist()) {
            try {
                if (getStopwords().exists() && !getStopwords().isDirectory())
                    stopwords.read(getStopwords());
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        // Operate on a per-class basis if class attribute is set
        int classInd = getInputFormat().classIndex();
        int values = 1;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            values = getInputFormat().attribute(classInd).numValues();
        }

        // TreeMap dictionaryArr [] = new TreeMap[values];
        TreeMap[] dictionaryArr = new TreeMap[values];
        for (int i = 0; i < values; i++) {
            dictionaryArr[i] = new TreeMap();
        }

        // Make sure we know which fields to convert
        determineSelectedRange();

        // Tokenize all training text into an orderedMap of "words".
        long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
            Instance instance = getInputFormat().instance(i);
            int vInd = 0;
            if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
                vInd = (int) instance.classValue();
            }

            // Iterate through all relevant string attributes of the current
            // instance
            Hashtable h = new Hashtable();
            for (int j = 0; j < instance.numAttributes(); j++) {
                if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                    // Get tokenizer
                    m_Tokenizer.tokenize(instance.stringValue(j));

                    // Iterate through tokens, perform stemming, and remove
                    // stopwords
                    // (if required)
                    while (m_Tokenizer.hasMoreElements()) {
                        String word = ((String) m_Tokenizer.nextElement()).intern();

                        if (this.m_lowerCaseTokens == true)
                            word = word.toLowerCase();

                        word = m_Stemmer.stem(word);

                        if (this.m_useStoplist == true)
                            if (stopwords.is(word))
                                continue;

                        if (!(h.contains(word)))
                            h.put(word, new Integer(0));

                        Count count = (Count) dictionaryArr[vInd].get(word);
                        if (count == null) {
                            dictionaryArr[vInd].put(word, new Count(1));
                        } else {
                            count.count++;
                        }
                    }
                }
            }

            // updating the docCount for the words that have occurred in
            // this
            // instance(document).
            Enumeration e = h.keys();
            while (e.hasMoreElements()) {
                String word = (String) e.nextElement();
                Count c = (Count) dictionaryArr[vInd].get(word);
                if (c != null) {
                    c.docCount++;
                } else
                    System.err.println("Warning: A word should definitely be in the "
                            + "dictionary.Please check the code");
            }

            if (pruneRate > 0) {
                if (i % pruneRate == 0 && i > 0) {
                    for (int z = 0; z < values; z++) {
                        Vector d = new Vector(1000);
                        Iterator it = dictionaryArr[z].keySet().iterator();
                        while (it.hasNext()) {
                            String word = (String) it.next();
                            Count count = (Count) dictionaryArr[z].get(word);
                            if (count.count <= 1) {
                                d.add(word);
                            }
                        }
                        Iterator iter = d.iterator();
                        while (iter.hasNext()) {
                            String word = (String) iter.next();
                            dictionaryArr[z].remove(word);
                        }
                    }
                }
            }
        }

        // Figure out the minimum required word frequency
        int totalsize = 0;
        int prune[] = new int[values];
        for (int z = 0; z < values; z++) {
            totalsize += dictionaryArr[z].size();

            int array[] = new int[dictionaryArr[z].size()];
            int pos = 0;
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                array[pos] = count.count;
                pos++;
            }

            // sort the array
            sortArray(array);
            if (array.length < m_WordsToKeep) {
                // if there aren't enough words, set the threshold to
                // minFreq
                prune[z] = m_minTermFreq;
            } else {
                // otherwise set it to be at least minFreq
                prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
            }
        }

        // Convert the dictionary into an attribute index
        // and create one attribute per word
        FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

        // Add the non-converted attributes
        int classIndex = -1;
        for (int i = 0; i < getInputFormat().numAttributes(); i++) {
            if (!m_SelectedRange.isInRange(i)) {
                if (getInputFormat().classIndex() == i) {
                    classIndex = attributes.size();
                }
                attributes.addElement(getInputFormat().attribute(i).copy());
            }
        }

        // Add the word vector attributes (eliminating duplicates
        // that occur in multiple classes)
        TreeMap newDictionary = new TreeMap();
        int index = attributes.size();
        for (int z = 0; z < values; z++) {
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                if (count.count >= prune[z]) {
                    if (newDictionary.get(word) == null) {
                        newDictionary.put(word, new Integer(index++));
                        attributes.addElement(new Attribute(m_Prefix + word));
                    }
                }
            }
        }

        // Compute document frequencies
        m_DocsCounts = new int[attributes.size()];
        Iterator it = newDictionary.keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            int idx = ((Integer) newDictionary.get(word)).intValue();
            int docsCount = 0;
            for (int j = 0; j < values; j++) {
                Count c = (Count) dictionaryArr[j].get(word);
                if (c != null)
                    docsCount += c.docCount;
            }
            m_DocsCounts[idx] = docsCount;
        }

        // Trim vector and set instance variables
        attributes.trimToSize();
        m_Dictionary = newDictionary;
        m_NumInstances = getInputFormat().numInstances();

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
        outputFormat.setClassIndex(classIndex);
        setOutputFormat(outputFormat);
    } else {
        //m_Dictionary = newDictionary;
        determineSelectedRange();
        m_NumInstances = getInputFormat().numInstances();

        TreeMap newDictionary = new TreeMap();
        for (int i = 2; i < forcedAttributes.size(); i++) {
            newDictionary.put(((Attribute) forcedAttributes.get(i)).name(), new Integer(i));
        }
        m_Dictionary = newDictionary;

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), forcedAttributes, 0);
        outputFormat.setClassIndex(1);
        setOutputFormat(outputFormat);
    }
}

From source file:classifier.page.PageClassifier.java

License:Open Source License

public static PageClassifier loadClassifier(String cfgDir) throws IOException, ClassNotFoundException {
    String stoplistFile = cfgDir + "/stoplist.txt";
    String modelFile = cfgDir + "/pageclassifier.model";
    String featureFile = cfgDir + "/pageclassifier.features";

    StopList stoplist = new StopListArquivo(stoplistFile);
    InputStream is = new FileInputStream(modelFile);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    Classifier classifier = (Classifier) objectInputStream.readObject();

    ParameterFile featureConfig = new ParameterFile(featureFile);
    String[] attributes = featureConfig.getParam("ATTRIBUTES", " ");
    weka.core.FastVector vectorAtt = new weka.core.FastVector();
    for (int i = 0; i < attributes.length; i++) {
        vectorAtt.addElement(new weka.core.Attribute(attributes[i]));
    }/*from ww w .  j a  va  2 s  .  c o  m*/
    String[] classValues = featureConfig.getParam("CLASS_VALUES", " ");
    weka.core.FastVector classAtt = new weka.core.FastVector();
    for (int i = 0; i < classValues.length; i++) {
        classAtt.addElement(classValues[i]);
    }
    vectorAtt.addElement(new weka.core.Attribute("class", classAtt));
    Instances insts = new Instances("target_classification", vectorAtt, 1);
    insts.setClassIndex(attributes.length);
    return new PageClassifier(classifier, insts, attributes, stoplist);
}

From source file:clustering.Clustering.java

public void percentageSplit(double percent) {
    try {/* w  w w. j a  v a  2  s.c  o  m*/
        data.randomize(new java.util.Random(0));
        int trainSize = (int) Math.round((double) data.numInstances() * percent / 100f);
        int testSize = data.numInstances() - trainSize;

        Instances train = new Instances(data, 0, trainSize);
        Instances test = new Instances(data, trainSize, testSize);

        buildClusterer(clusterer, train);

        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer(model);
        eval.evaluateClusterer(test);
        System.out.println(eval.clusterResultsToString());
    } catch (Exception ex) {
        System.out.println(ex);
    }
}

From source file:cn.edu.xjtu.dbmine.StringToWordVector.java

License:Open Source License

/**
 * determines the dictionary./*from   w w w.j av a  2s  . c om*/
 */
private void determineDictionary() {
    // initialize stopwords
    Stopwords stopwords = new Stopwords();
    if (getUseStoplist()) {
        try {
            if (getStopwords().exists() && !getStopwords().isDirectory())
                stopwords.read(getStopwords());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    // Operate on a per-class basis if class attribute is set
    int classInd = getInputFormat().classIndex();
    int values = 1;
    if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
        values = getInputFormat().attribute(classInd).numValues();
        // System.out.println("number of class:"+getInputFormat().numClasses()+" "+getInputFormat().attribute(classInd).value(0));

    }

    // TreeMap dictionaryArr [] = new TreeMap[values];
    TreeMap[] dictionaryArr = new TreeMap[values];
    for (int i = 0; i < values; i++) {
        dictionaryArr[i] = new TreeMap();
    }

    // Make sure we know which fields to convert
    determineSelectedRange();

    // Tokenize all training text into an orderedMap of "words".
    long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance instance = getInputFormat().instance(i);
        int vInd = 0;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            vInd = (int) instance.classValue();
        }

        // Iterate through all relevant string attributes of the current
        // instance
        Hashtable h = new Hashtable();
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                // Get tokenizer
                m_Tokenizer.tokenize(instance.stringValue(j));

                // Iterate through tokens, perform stemming, and remove
                // stopwords
                // (if required)
                while (m_Tokenizer.hasMoreElements()) {
                    String word = ((String) m_Tokenizer.nextElement()).intern();

                    if (this.m_lowerCaseTokens == true)
                        word = word.toLowerCase();

                    word = m_Stemmer.stem(word);

                    if (this.m_useStoplist == true)
                        if (stopwords.is(word))
                            continue;

                    if (!(h.contains(word)))
                        h.put(word, new Integer(0));

                    Count count = (Count) dictionaryArr[vInd].get(word);
                    if (count == null) {
                        dictionaryArr[vInd].put(word, new Count(1));
                    } else {
                        count.count++;
                    }
                }
            }
        }

        // updating the docCount for the words that have occurred in this
        // instance(document).
        Enumeration e = h.keys();
        while (e.hasMoreElements()) {
            String word = (String) e.nextElement();
            Count c = (Count) dictionaryArr[vInd].get(word);
            if (c != null) {
                c.docCount++;
                // c.doclist.add(vInd);
            } else
                System.err.println(
                        "Warning: A word should definitely be in the " + "dictionary.Please check the code");
        }

        if (pruneRate > 0) {
            if (i % pruneRate == 0 && i > 0) {
                for (int z = 0; z < values; z++) {
                    Vector d = new Vector(1000);
                    Iterator it = dictionaryArr[z].keySet().iterator();
                    while (it.hasNext()) {
                        String word = (String) it.next();
                        Count count = (Count) dictionaryArr[z].get(word);
                        if (count.count <= 1) {
                            d.add(word);
                        }
                    }
                    Iterator iter = d.iterator();
                    while (iter.hasNext()) {
                        String word = (String) iter.next();
                        dictionaryArr[z].remove(word);
                    }
                }
            }
        }
    }

    // Figure out the minimum required word frequency
    int totalsize = 0;
    int prune[] = new int[values];
    for (int z = 0; z < values; z++) {
        totalsize += dictionaryArr[z].size();

        int array[] = new int[dictionaryArr[z].size()];
        int pos = 0;
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            array[pos] = count.count;
            pos++;
        }

        // sort the array
        sortArray(array);
        if (array.length < m_WordsToKeep) {
            // if there aren't enough words, set the threshold to
            // minFreq
            prune[z] = m_minTermFreq;
        } else {
            // otherwise set it to be at least minFreq
            prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
        }
    }

    // Convert the dictionary into an attribute index
    // and create one attribute per word
    FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

    // Add the non-converted attributes
    int classIndex = -1;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().classIndex() == i) {
                classIndex = attributes.size();
            }
            attributes.addElement(getInputFormat().attribute(i).copy());
        }
    }

    // Add the word vector attributes (eliminating duplicates
    // that occur in multiple classes)
    TreeMap newDictionary = new TreeMap();
    int index = attributes.size();
    for (int z = 0; z < values; z++) {
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            if (count.count >= prune[z]) {
                if (newDictionary.get(word) == null) {
                    newDictionary.put(word, new Integer(index++));
                    attributes.addElement(new Attribute(m_Prefix + word));
                }
            }
        }
    }

    // Compute document frequencies
    m_DocsCounts = new int[attributes.size()];
    Iterator it = newDictionary.keySet().iterator();
    while (it.hasNext()) {
        String word = (String) it.next();
        int idx = ((Integer) newDictionary.get(word)).intValue();
        int docsCount = 0;
        for (int j = 0; j < values; j++) {
            Count c = (Count) dictionaryArr[j].get(word);
            if (c != null)
                docsCount += c.docCount;
            /*
             * if(!ctd.containsKey(j)){ Map<Integer,Integer> ma = new
             * HashMap<Integer,Integer>(); ctd.put(j, ma); }
             */
            // if(ctd.get(j)==null)
            // ctd.get(j).put(idx, c);
            // int tt = ctd.get(j).get(idx);
            /*
             * for(int kk = 0;kk<c.doclist.size();kk++) {
             * //if(getInputFormat
             * ().instance(c.doclist.get(kk)).value(idx)>0)
             * ctd.get(j).put(idx, tt++); }
             */}
        m_DocsCounts[idx] = docsCount;
    }

    // Trim vector and set instance variables
    attributes.trimToSize();
    m_Dictionary = newDictionary;
    m_NumInstances = getInputFormat().numInstances();

    // Set the filter's output format
    Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public Instances loadPropertiesAsInstancesPre(String Path) {
    HashMap<String, String> pmap = null;
    try {//from  w  ww. j a  v  a 2 s . c o m
        pmap = Yaml.loadType(new FileInputStream(yamlPath), HashMap.class);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }

    atts = new ArrayList<Attribute>();
    Instance dfIns = new DenseInstance(pmap.size());
    int pos = 0;
    double[] vals = new double[pmap.size()];
    for (Map.Entry<String, String> ent : pmap.entrySet()) {
        try {
            double val = Double.valueOf(String.valueOf(ent.getValue()));
            vals[pos] = val;

            Properties p1 = new Properties();
            double upper, lower;
            if (val != 0) {
                upper = val * (1. + 0.5);
                lower = val * (1. - 0.5);
            } else {
                lower = val;
                upper = 1;
            }

            p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
            ProtectedProperties prop1 = new ProtectedProperties(p1);

            atts.add(new Attribute(String.valueOf(ent.getKey()), prop1));
            pos++;
        } catch (Exception e) {
        }
    }

    Instances dfProp = new Instances("DefaultConfig", atts, 1);
    dfProp.add(dfIns);
    dfIns.setDataset(dfProp);
    for (int i = 0; i < pos; i++) {
        dfIns.setValue(atts.get(i), vals[i]);
        //System.err.println(atts.get(i)+":"+vals[i]);
    }

    return dfProp;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public Instances loadPropertiesAsInstances(String Path) {
    HashMap<String, String> pmap = null;
    HashMap rangeMap = null;/*from   ww w .  j  ava 2  s .  c  o  m*/
    try {
        pmap = Yaml.loadType(new FileInputStream(yamlPath), HashMap.class);
        rangeMap = Yaml.loadType(new FileInputStream(yamlPath + "_range"), HashMap.class);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }

    atts = new ArrayList<Attribute>();
    int pos = 0;
    double[] vals = new double[pmap.size()];
    Object range = null;
    for (Map.Entry<String, String> ent : pmap.entrySet()) {
        try {
            double val = Double.valueOf(String.valueOf(ent.getValue()));
            vals[pos] = val;

            Properties p1 = new Properties();

            range = rangeMap.get(ent.getKey());
            if (range != null) {
                String list = (String) range;
                if (list.indexOf('[') == -1 && list.indexOf('(') == -1)
                    throw new Exception("No Range for You" + ent.getKey());
                p1.setProperty("range", list.trim());
            } else {
                double upper, lower;
                if (val != 0) {
                    upper = val * (1. + 0.5);
                    lower = val * (1. - 0.5);
                } else {
                    lower = val;
                    upper = 1;
                }
                p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
            }

            ProtectedProperties prop1 = new ProtectedProperties(p1);

            atts.add(new Attribute(String.valueOf(ent.getKey()), prop1));
            pos++;
        } catch (Exception e) {
        }
    }

    Instances dfProp = new Instances("DefaultConfig", atts, 1);
    Instance dfIns = new DenseInstance(atts.size());
    for (int i = 0; i < pos; i++) {
        dfIns.setValue(atts.get(i), vals[i]);
        //System.err.println(atts.get(i)+":"+vals[i]);
    }
    dfProp.add(dfIns);
    dfIns.setDataset(dfProp);

    return dfProp;
}