Example usage for weka.core Instances deleteAttributeAt

Introduction

In this page you can find the example usage for weka.core Instances deleteAttributeAt.

Prototype



public void deleteAttributeAt(int position)

Source Link

Document

Deletes an attribute at the given position (0 to numAttributes() - 1).

Usage

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts the instances in the given dataset to binary, setting the specified labels to positive.
 * Note this method is destructive to data, directly modifying its contents.
 * @param data the multiclass dataset to be converted to binary.
 * @param positiveClassValue the class value to treat as positive.
 */// w  ww. j  ava  2  s.  c o m
public static void convertMulticlassToBinary(Instances data, String positiveClassValue) {

    // ensure that data is nominal
    if (!data.classAttribute().isNominal())
        throw new IllegalArgumentException("Instances must have a nominal class.");

    // create the new class attribute
    FastVector newClasses = new FastVector(2);
    newClasses.addElement("Y");
    newClasses.addElement("N");
    Attribute newClassAttribute = new Attribute("class", newClasses);

    // alter the class attribute to be binary
    int newClassAttIdx = data.classIndex();
    data.insertAttributeAt(newClassAttribute, newClassAttIdx);
    int classAttIdx = data.classIndex();

    // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively)
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        Instance inst = data.instance(instIdx);
        if (inst.stringValue(classAttIdx).equals(positiveClassValue)) {
            inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y
        } else {
            inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0
        }
    }

    // switch the class index to the new class and delete the old class
    data.setClassIndex(newClassAttIdx);
    data.deleteAttributeAt(classAttIdx);

    // alter the dataset name
    data.setRelationName(data.relationName() + "-" + positiveClassValue);
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the train-atc functionalities. Train ATC using a double one vs. all classifier
 * (E and A) for E#A aspect categories//from w ww  .  ja  va2  s .com
 * @throws Exception 
 */
public final void trainATC2(final InputStream inputStream) throws IOException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String testFile = parsedArguments.getString("testset");
    String paramFile2 = parsedArguments.getString("params2");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    //String validation = parsedArguments.getString("validation");
    String lang = parsedArguments.getString("language");
    //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
    //boolean printPreds = parsedArguments.getBoolean("printPreds");
    boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences");
    boolean onlyTest = parsedArguments.getBoolean("testOnly");
    double threshold = 0.5;
    double threshold2 = 0.5;
    String modelsPath = "/home/inaki/elixa-atp/ovsaModels";

    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang);
    Features atcTrain = new Features(reader, paramFile, "3");
    Instances traindata = atcTrain.loadInstances(true, "atc");

    if (onlyTest) {
        if (FileUtilsElh.checkFile(testFile)) {
            System.err.println("read from test file");
            reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat,
                    nullSentenceOpinions, lang);
            atcTrain.setCorpus(reader);
            traindata = atcTrain.loadInstances(true, "atc");
        }
    }

    //setting class attribute (entCat|attCat|entAttCat|polarityCat)

    //HashMap<String, Integer> opInst = atcTrain.getOpinInst();      
    //WekaWrapper classifyAtts;
    WekaWrapper onevsall;
    try {

        //classify.printMultilabelPredictions(classify.multiLabelPrediction());      */   

        //onevsall
        Instances entdata = new Instances(traindata);
        entdata.deleteAttributeAt(entdata.attribute("attCat").index());
        entdata.deleteAttributeAt(entdata.attribute("entAttCat").index());
        entdata.setClassIndex(entdata.attribute("entCat").index());
        onevsall = new WekaWrapper(entdata, true);

        if (!onlyTest) {
            onevsall.trainOneVsAll(modelsPath, paramFile + "entCat");
            System.out.println("trainATC: one vs all models ready");
        }
        onevsall.setTestdata(entdata);
        HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath,
                paramFile + "entCat");
        System.out.println("trainATC: one vs all predictions ready");
        HashMap<Integer, String> instOps = new HashMap<Integer, String>();
        for (String oId : atcTrain.getOpinInst().keySet()) {
            instOps.put(atcTrain.getOpinInst().get(oId), oId);
        }

        atcTrain = new Features(reader, paramFile2, "3");
        entdata = atcTrain.loadInstances(true, "attTrain2_data");
        entdata.deleteAttributeAt(entdata.attribute("entAttCat").index());
        //entdata.setClassIndex(entdata.attribute("entCat").index());

        Attribute insAtt = entdata.attribute("instanceId");
        double maxInstId = entdata.kthSmallestValue(insAtt, entdata.numDistinctValues(insAtt) - 1);
        System.err.println("last instance has index: " + maxInstId);
        for (int ins = 0; ins < entdata.numInstances(); ins++) {
            System.err.println("ins" + ins);
            int i = (int) entdata.instance(ins).value(insAtt);
            Instance currentInst = entdata.instance(ins);
            //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
            String sId = reader.getOpinion(instOps.get(i)).getsId();
            String oId = instOps.get(i);
            reader.removeSentenceOpinions(sId);
            int oSubId = 0;
            for (String cl : ovsaRes.get(i).keySet()) {
                //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                if (ovsaRes.get(i).get(cl) > threshold) {
                    //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));                  
                    // for the first one update the instances
                    if (oSubId >= 1) {
                        Instance newIns = new SparseInstance(currentInst);
                        newIns.setDataset(entdata);
                        entdata.add(newIns);
                        newIns.setValue(insAtt, maxInstId + oSubId);
                        newIns.setClassValue(cl);
                        instOps.put((int) maxInstId + oSubId, oId);

                    }
                    // if the are more create new instances
                    else {
                        currentInst.setClassValue(cl);
                        //create and add opinion to the structure
                        //   trgt, offsetFrom, offsetTo, polarity, cat, sId);
                        //Opinion op = new Opinion(instOps.get(i)+"_"+oSubId, "", 0, 0, "", cl, sId);
                        //reader.addOpinion(op);
                    }
                    oSubId++;
                }
            } //finished updating instances data                                    
        }

        entdata.setClass(entdata.attribute("attCat"));
        onevsall = new WekaWrapper(entdata, true);

        /**
         *  Bigarren sailkatzailea
         * 
         * */
        if (!onlyTest) {
            onevsall.trainOneVsAll(modelsPath, paramFile + "attCat");
            System.out.println("trainATC: one vs all attcat models ready");
        }

        ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entAttCat");

        insAtt = entdata.attribute("instanceId");
        maxInstId = entdata.kthSmallestValue(insAtt, insAtt.numValues());
        System.err.println("last instance has index: " + maxInstId);
        for (int ins = 0; ins < entdata.numInstances(); ins++) {
            System.err.println("ins: " + ins);
            int i = (int) entdata.instance(ins).value(insAtt);
            Instance currentInst = entdata.instance(ins);
            //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
            String sId = reader.getOpinion(instOps.get(i)).getsId();
            String oId = instOps.get(i);
            reader.removeSentenceOpinions(sId);
            int oSubId = 0;
            for (String cl : ovsaRes.get(i).keySet()) {
                //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                if (ovsaRes.get(i).get(cl) > threshold2) {
                    ///System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                    if (ovsaRes.get(i).get(cl) > threshold) {
                        //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));                  
                        // for the first one update the instances
                        if (oSubId >= 1) {
                            String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl;
                            //create and add opinion to the structure
                            //   trgt, offsetFrom, offsetTo, polarity, cat, sId);                     
                            Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId);
                            reader.addOpinion(op);
                        }
                        // if the are more create new instances
                        else {
                            String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl;
                            //create and add opinion to the structure
                            //   trgt, offsetFrom, offsetTo, polarity, cat, sId);
                            reader.removeOpinion(oId);
                            Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId);
                            reader.addOpinion(op);
                        }
                        oSubId++;
                    }
                } //finished updating instances data                                    
            }
        }
        reader.print2Semeval2015format(paramFile + "entAttCat.xml");
    } catch (Exception e) {
        e.printStackTrace();
    }

    //traindata.setClass(traindata.attribute("entAttCat"));
    System.err.println("DONE CLI train-atc2 (oneVsAll)");
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * train ATC using a single classifier (one vs. all) for E#A aspect categories.
 * //from   w  ww.  j  a  v  a2s  .c  o m
 * @param inputStream
 * @throws IOException
 */
public final void trainATCsingleCategory(final InputStream inputStream) throws IOException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String testFile = parsedArguments.getString("testset");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    //String validation = parsedArguments.getString("validation");
    String lang = parsedArguments.getString("language");
    //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
    //boolean printPreds = parsedArguments.getBoolean("printPreds");
    boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences");
    boolean onlyTest = parsedArguments.getBoolean("testOnly");
    double threshold = 0.5;

    String modelsPath = "/home/inaki/Proiektuak/BOM/SEMEVAL2015/ovsaModels";

    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang);
    Features atcTrain = new Features(reader, paramFile, "3");
    Instances traindata = atcTrain.loadInstances(true, "atc");

    if (onlyTest) {
        if (FileUtilsElh.checkFile(testFile)) {
            System.err.println("read from test file");
            reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat,
                    nullSentenceOpinions, lang);
            atcTrain.setCorpus(reader);
            traindata = atcTrain.loadInstances(true, "atc");
        }
    }

    //setting class attribute (entCat|attCat|entAttCat|polarityCat)

    //HashMap<String, Integer> opInst = atcTrain.getOpinInst();
    //WekaWrapper classifyEnts;
    //WekaWrapper classifyAtts;
    WekaWrapper onevsall;
    try {

        //classify.printMultilabelPredictions(classify.multiLabelPrediction());      */   

        //onevsall
        //Instances entdata = new Instances(traindata);
        traindata.deleteAttributeAt(traindata.attribute("attCat").index());
        traindata.deleteAttributeAt(traindata.attribute("entCat").index());
        traindata.setClassIndex(traindata.attribute("entAttCat").index());
        onevsall = new WekaWrapper(traindata, true);

        if (!onlyTest) {
            onevsall.trainOneVsAll(modelsPath, paramFile + "entAttCat");
            System.out.println("trainATC: one vs all models ready");
        }
        onevsall.setTestdata(traindata);
        HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath,
                paramFile + "entAttCat");
        System.out.println("trainATC: one vs all predictions ready");
        HashMap<Integer, String> kk = new HashMap<Integer, String>();
        for (String oId : atcTrain.getOpinInst().keySet()) {
            kk.put(atcTrain.getOpinInst().get(oId), oId);
        }

        Object[] ll = ovsaRes.get(1).keySet().toArray();
        for (Object l : ll) {
            System.err.print((String) l + " - ");
        }
        System.err.print("\n");

        for (int i : ovsaRes.keySet()) {
            //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
            String sId = reader.getOpinion(kk.get(i)).getsId();
            reader.removeSentenceOpinions(sId);
            int oSubId = 0;
            for (String cl : ovsaRes.get(i).keySet()) {
                //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                if (ovsaRes.get(i).get(cl) > threshold) {
                    //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                    oSubId++;
                    //create and add opinion to the structure
                    //trgt, offsetFrom, offsetTo, polarity, cat, sId);
                    Opinion op = new Opinion(kk.get(i) + "_" + oSubId, "", 0, 0, "", cl, sId);
                    reader.addOpinion(op);
                }
            }
        }
        reader.print2Semeval2015format(paramFile + "entAttCat.xml");
    } catch (Exception e) {
        e.printStackTrace();
    }

    //traindata.setClass(traindata.attribute("entAttCat"));
    System.err.println("DONE CLI train-atc2 (oneVsAll)");
}

From source file:elh.eus.absa.WekaWrapper.java

License:Open Source License

/**
 *      Train one vs all models over the given training data.
 *  /*from   w  w  w  .  j ava 2s .com*/
 * @param modelpath directory to store each model for the one vs. all method
 * @param prefix prefix the models should have (each model will have the name of its class appended
 * @throws Exception
 */
public void trainOneVsAll(String modelpath, String prefix) throws Exception {
    Instances orig = new Instances(traindata);
    Enumeration<Object> classValues = traindata.classAttribute().enumerateValues();
    String classAtt = traindata.classAttribute().name();
    while (classValues.hasMoreElements()) {
        String v = (String) classValues.nextElement();
        System.err.println("trainer onevsall for class " + v + " classifier");
        //needed because of weka's sparse data format problems THIS IS TROUBLE! ...
        if (v.equalsIgnoreCase("dummy")) {
            continue;
        }
        // copy instances and set the same class value
        Instances ovsa = new Instances(orig);
        //create a new class attribute         
        //   // Declare the class attribute along with its values
        ArrayList<String> classVal = new ArrayList<String>();
        classVal.add("dummy"); //needed because of weka's sparse data format problems...
        classVal.add(v);
        classVal.add("UNKNOWN");
        ovsa.insertAttributeAt(new Attribute(classAtt + "2", classVal), ovsa.numAttributes());
        //change all instance labels that have not the current class value to "other"
        for (int i = 0; i < ovsa.numInstances(); i++) {
            Instance inst = ovsa.instance(i);
            String instClass = inst.stringValue(ovsa.attribute(classAtt).index());
            if (instClass.equalsIgnoreCase(v)) {
                inst.setValue(ovsa.attribute(classAtt + "2").index(), v);
            } else {
                inst.setValue(ovsa.attribute(classAtt + "2").index(), "UNKNOWN");
            }
        }
        //delete the old class attribute and set the new.         
        ovsa.setClassIndex(ovsa.attribute(classAtt + "2").index());
        ovsa.deleteAttributeAt(ovsa.attribute(classAtt).index());
        ovsa.renameAttribute(ovsa.attribute(classAtt + "2").index(), classAtt);
        ovsa.setClassIndex(ovsa.attribute(classAtt).index());

        //build the classifier, crossvalidate and store the model
        setTraindata(ovsa);
        saveModel(modelpath + File.separator + prefix + "_" + v + ".model");
        setTestdata(ovsa);
        testModel(modelpath + File.separator + prefix + "_" + v + ".model");

        System.err.println("trained onevsall " + v + " classifier");
    }

    setTraindata(orig);
}

From source file:entities.ArffFile.java

/**
 * Dada una lista de parametros, se ejecuta el filtro de microagregacion.
 * Todos estos parametros son entrada del usuario.
 * @param df Puede ser Euclidian o Manhattan distance, se especifica en la entrada.
 * @param numCluster/*from  w ww.  j  a v  a 2 s.c om*/
 * @param seed
 * @param maxIterations
 * @param replaceMissingValues
 * @param preserveInstancesOrder
 * @param attributes lista de los atributos que se desean generalizar con cluster
 */
public void microAgregacion(DistanceFunction df, int numCluster, int seed, int maxIterations,
        boolean replaceMissingValues, boolean preserveInstancesOrder, List<Integer> attributes)
        throws Exception {
    //instancesFilter = new Instances(instances);
    SimpleKMeans kMeans;
    kMeans = new SimpleKMeans();
    Instances uniqueAttributes;
    uniqueAttributes = new Instances(instancesFilter);
    List<String> names = new ArrayList<>();
    int i = 0;
    for (Integer attribute : attributes) {
        String name = new String(instancesFilter.attribute(attribute).name());
        if (instancesFilter.attribute(attribute).isDate() || instancesFilter.attribute(attribute).isString())
            throw new Exception("No se puede hacer cluster con atributos de tipo DATE o STRING");
        names.add(name);
    }
    while (uniqueAttributes.numAttributes() != attributes.size()) {
        if (!names.contains(uniqueAttributes.attribute(i).name()))
            uniqueAttributes.deleteAttributeAt(i);
        else
            i++;
    }
    try {
        kMeans.setNumClusters(numCluster);
        kMeans.setMaxIterations(maxIterations);
        kMeans.setSeed(seed);
        kMeans.setDisplayStdDevs(false);
        kMeans.setDistanceFunction(df);
        kMeans.setDontReplaceMissingValues(replaceMissingValues);
        kMeans.setPreserveInstancesOrder(preserveInstancesOrder);
        kMeans.buildClusterer(uniqueAttributes);
        //System.out.println(kMeans);
        for (int j = 0; j < uniqueAttributes.numInstances(); j++) {
            int cluster = kMeans.clusterInstance(uniqueAttributes.instance(j));
            for (int k = 0; k < uniqueAttributes.numAttributes(); k++) {
                if (uniqueAttributes.attribute(k).isNumeric())
                    uniqueAttributes.instance(j).setValue(k,
                            Double.parseDouble(kMeans.getClusterCentroids().instance(cluster).toString(k)));
                else
                    uniqueAttributes.instance(j).setValue(k,
                            kMeans.getClusterCentroids().instance(cluster).toString(k));
            }
        }
        replaceValues(uniqueAttributes, attributes);
    } catch (Exception ex) {
        Logger.getLogger(ArffFile.class.getName()).log(Level.SEVERE, null, ex);
    }
    //saveToFile("4");
}

From source file:es.bsc.autonomic.powermodeller.tools.classifiers.WekaWrapper.java

License:Apache License

public static DataSet processDataSet(DataSet ds, VariableParser parser) {

    String independent = ds.getIndependent();

    if (independent == null)
        throw new WekaWrapperException("Independent variable is not set in dataset.");

    HashMap<String, String> expression_list = parser.getNewMetrics();
    Instances data = convertDataSetToInstances(ds);

    try {// w  w w.java2 s.  com
        // Apply filters for all the new variables
        for (Map.Entry<String, String> entry : expression_list.entrySet()) {
            String key = entry.getKey();
            String value = entry.getValue();
            logger.debug("Generating new variable " + key + " as " + value);

            AddExpression add_filter = new AddExpression();
            add_filter.setName(key);
            add_filter.setExpression(value);
            add_filter.setInputFormat(data);

            data = useFilter(data, add_filter);

        }

    } catch (Exception e) {
        logger.error("Error while processing new variables", e);
        throw new WekaWrapperException("Error while processing new variables");
    }

    // Iterate over all the columns and keep only the ones contained in variables list
    List<String> variables = parser.getColumns();

    // Append independent variable to the list of variables to keep
    variables.add(independent);

    // Remove unneeded attributes
    try {

        // it's important to iterate from last to first, because when we remove
        // an instance, the rest shifts by one position.
        for (int i = data.numAttributes() - 1; i >= 0; i--) {
            String n = data.attribute(i).name();
            if (!variables.contains(data.attribute(i).name())) {
                logger.trace("Deleting unnecessary attribute " + data.attribute(i).name());
                data.deleteAttributeAt(i);
            }
        }

        data.toString();
    } catch (Exception e) {
        logger.error("Error while removing unneeded variables", e);
        throw new WekaWrapperException("Error while removing unneeded variables");
    }

    // Convert Instances in csv and return the new DataSet
    String new_path = CoreConfiguration.getNewCSVFileName();
    try {
        CSVSaver saver = new CSVSaver();
        saver.setInstances(data);
        saver.setFile(new File(new_path));
        saver.writeBatch();
    } catch (Exception e) {
        logger.error("Error while removing unneeded variables", e);
        throw new WekaWrapperException("Error while removing unneeded variables");
    }

    DataSet ret = new DataSet(new_path);
    ret.setIndependent(independent);
    return ret;
}

From source file:fantail.algorithms.RankingByPairwiseComparison.java

License:Open Source License

@Override
public void buildRanker(Instances data) throws Exception {
    m_Classifiers = new ArrayList<weka.classifiers.AbstractClassifier>();
    m_AlgoPairs = new ArrayList<String>();
    m_NumLabels = Tools.getNumberTargets(data);

    // build pb datasets
    for (int a = 0; a < m_NumLabels; a++) {
        for (int b = 0; b < m_NumLabels; b++) {

            String pairStr = a + "|" + b;
            if (!hasPair(m_AlgoPairs, pairStr) && a != b) {
                m_AlgoPairs.add(pairStr);

                Instances d = new Instances(data);
                d.setClassIndex(-1);//from   ww  w .  j  ava2  s  . co m
                d.deleteAttributeAt(d.numAttributes() - 1);

                weka.filters.unsupervised.attribute.Add add = new weka.filters.unsupervised.attribute.Add();
                add.setInputFormat(d);
                add.setOptions(weka.core.Utils
                        .splitOptions("-T NOM -N class -L " + ((int) a) + "," + ((int) b) + " -C last"));

                d = Filter.useFilter(d, add);
                d.setClassIndex(d.numAttributes() - 1);

                for (int i = 0; i < d.numInstances(); i++) {

                    Instance metaInst = (Instance) data.instance(i);
                    Instance inst = d.instance(i);

                    double[] rankVector = Tools.getTargetVector(metaInst);

                    double rank_a = rankVector[a];
                    double rank_b = rankVector[b];

                    if (rank_a < rank_b) {
                        inst.setClassValue(0.0);
                    } else {
                        inst.setClassValue(1.0);
                    }
                }

                //weka.classifiers.functions.SMO cls = new weka.classifiers.functions.SMO();
                //String ops = "weka.classifiers.functions.SMO -C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 0.01\"";
                //cls.setOptions(weka.core.Utils.splitOptions(ops));                   
                //cls.buildClassifier(d);
                //weka.classifiers.functions.Logistic cls = new weka.classifiers.functions.Logistic();
                //weka.classifiers.trees.J48 cls = new weka.classifiers.trees.J48();
                //weka.classifiers.rules.ZeroR cls = new weka.classifiers.rules.ZeroR();
                weka.classifiers.trees.DecisionStump cls = new weka.classifiers.trees.DecisionStump();
                cls.buildClassifier(d);
                m_Classifiers.add(cls);
                m_BaseClassifierName = cls.getClass().getSimpleName();
                m_Add = add;
            }
        }
    }
}

From source file:fantail.algorithms.RankingByPairwiseComparison.java

License:Open Source License

@Override
public double[] recommendRanking(Instance testInst) throws Exception {
    Instances tempData = new Instances(testInst.dataset(), 0);
    tempData.add((Instance) testInst.copy());
    // remove the relation att
    tempData.setClassIndex(-1);//from  w w w  .j  av a 2  s . c  o  m
    tempData.deleteAttributeAt(tempData.numAttributes() - 1);
    tempData = Filter.useFilter(tempData, m_Add);
    tempData.setClassIndex(tempData.numAttributes() - 1);
    double predRanking[] = new double[m_NumLabels];
    for (int i = 0; i < predRanking.length; i++) {
        predRanking[i] = m_NumLabels - 1;
    }
    for (int i = 0; i < m_Classifiers.size(); i++) {
        double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0));
        String algoPair = m_AlgoPairs.get(i);
        String[] parts = algoPair.split("\\|");
        int trueIndex = Integer.parseInt(parts[(int) predIndex]);
        predRanking[trueIndex] -= 1;
    }
    predRanking = Tools.doubleArrayToRanking(predRanking);
    return predRanking;
}

From source file:fantail.algorithms.RankingByPairwiseComparison.java

License:Open Source License

public double[] recommendRanking2(Instance testInst) throws Exception {
    Instances tempData = new Instances(testInst.dataset(), 0);
    tempData.add((Instance) testInst.copy());
    // remove the relation att
    tempData.setClassIndex(-1);/*from   w  w  w .  jav a  2  s  . co  m*/
    tempData.deleteAttributeAt(tempData.numAttributes() - 1);
    tempData = Filter.useFilter(tempData, m_Add);
    tempData.setClassIndex(tempData.numAttributes() - 1);
    double predRanking[] = new double[m_NumLabels];
    for (int i = 0; i < m_Classifiers.size(); i++) {
        double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0));
        double predProb = m_Classifiers.get(i).distributionForInstance(tempData.instance(0))[0];
        String algoPair = m_AlgoPairs.get(i);
        String[] parts = algoPair.split("\\|");
        int trueIndex = Integer.parseInt(parts[(int) predIndex]);
        predRanking[trueIndex] -= predProb;
    }
    return Tools.doubleArrayToRanking(predRanking);
}

From source file:lu.lippmann.cdb.datasetview.tabs.StatsTabView.java

License:Open Source License

public static Instances buildStatsForNumericalAttributes(final Instances dataset) throws Exception {
    final StringBuilder sb = new StringBuilder("@relation blabla\n");
    sb.append("@attribute 'name' string\n");
    sb.append("@attribute 'min' string\n");
    sb.append("@attribute 'max' string\n");
    sb.append("@attribute 'mean' string\n");
    sb.append("@attribute 'stdDev' string\n");
    sb.append("@attribute 'missing values count' string\n");
    sb.append("@attribute 'missing values %' string\n");
    sb.append("@attribute 'values repartition' string\n");
    sb.append("@data\n");

    for (int i = 0; i < dataset.numAttributes(); i++) {
        if (dataset.attribute(i).isNumeric() && !dataset.attribute(i).isDate()) {
            sb.append(dataset.attribute(i).name()).append(",")
                    .append(FormatterUtil.DECIMAL_FORMAT.format(dataset.attributeStats(i).numericStats.min))
                    .append(",")
                    .append(FormatterUtil.DECIMAL_FORMAT.format(dataset.attributeStats(i).numericStats.max))
                    .append(",")
                    .append(FormatterUtil.DECIMAL_FORMAT.format(dataset.attributeStats(i).numericStats.mean))
                    .append(",")
                    .append(FormatterUtil.DECIMAL_FORMAT.format(dataset.attributeStats(i).numericStats.stdDev))
                    .append(",").append(dataset.attributeStats(i).missingCount).append(",")
                    .append(FormatterUtil.DECIMAL_FORMAT
                            .format(100d * dataset.attributeStats(i).missingCount / dataset.numInstances()))
                    .append(",").append("''").append("\n");
        } else if (dataset.attribute(i).isNominal()) {
            sb.append(dataset.attribute(i).name()).append(",'','','','','','','");

            final Map<Object, String> nominalRep = WekaDataStatsUtil
                    .getNominalRepartitionForDescription(dataset, i);
            for (Map.Entry<Object, String> e : nominalRep.entrySet()) {
                sb.append(e.getKey()).append("=").append(e.getValue()).append(" ");
            }/*  w  ww .  j a  v a 2 s.  com*/

            sb.append("'\n");
        }
    }

    final Instances newds = WekaDataAccessUtil.loadInstancesFromARFFString(sb.toString(), false, false);

    if (WekaDataStatsUtil.getNominalAttributesIndexes(dataset).length == 0) {
        newds.deleteAttributeAt(newds.numAttributes() - 1);
    }
    return newds;
}