Example usage for weka.core Instances toString

Introduction

In this page you can find the example usage for weka.core Instances toString.

Prototype

@Override
public String toString()

Source Link

Document

Returns the dataset as a string in ARFF format.

Usage

From source file:mulan.data.ConverterLibSVM.java

License:Open Source License

/**
 * Converts a multi-label dataset from LibSVM format to the format
 * that is compatible with Mulan. It constructs one ARFF and one XML file. 
 *
 * @param path the directory that contains the source file and will contain 
 * the target files//w  w w .j a v a  2s.co m
 * @param sourceFilename the name of the source file
 * @param relationName the relation name of the arff file that will be 
 * constructed
 * @param targetFilestem the filestem for the target files (.arff and .xml)
 */
public static void convertFromLibSVM(String path, String sourceFilename, String targetFilestem,
        String relationName) {
    BufferedReader aReader = null;
    BufferedWriter aWriter = null;

    int numLabels = 0;
    int numAttributes = 0;
    int numInstances = 0;
    double meanParsedAttributes = 0;

    // Calculate number of labels and attributes

    String Line = null;
    try {
        aReader = new BufferedReader(new FileReader(path + sourceFilename));

        while ((Line = aReader.readLine()) != null) {
            numInstances++;

            StringTokenizer strTok = new StringTokenizer(Line, " ");
            while (strTok.hasMoreTokens()) {
                String token = strTok.nextToken();

                if (token.indexOf(":") == -1) {
                    // parse label info
                    StringTokenizer labelTok = new StringTokenizer(token, ",");
                    while (labelTok.hasMoreTokens()) {
                        String strLabel = labelTok.nextToken();
                        int intLabel = Integer.parseInt(strLabel);
                        if (intLabel > numLabels) {
                            numLabels = intLabel;
                        }
                    }
                } else {
                    // parse attribute info
                    meanParsedAttributes++;
                    StringTokenizer attrTok = new StringTokenizer(token, ":");
                    String strAttrIndex = attrTok.nextToken();
                    int intAttrIndex = Integer.parseInt(strAttrIndex);
                    if (intAttrIndex > numAttributes) {
                        numAttributes = intAttrIndex;
                    }
                }
            }
        }

        numLabels++;

        System.out.println("Number of attributes: " + numAttributes);
        System.out.println("Number of instances: " + numInstances);
        System.out.println("Number of classes: " + numLabels);

        System.out.println("Constructing XML file... ");
        LabelsMetaDataImpl meta = new LabelsMetaDataImpl();
        for (int label = 0; label < numLabels; label++) {
            meta.addRootNode(new LabelNodeImpl("Label" + (label + 1)));
        }

        String labelsFilePath = path + targetFilestem + ".xml";
        try {
            LabelsBuilder.dumpLabels(meta, labelsFilePath);
            System.out.println("Done!");
        } catch (LabelsBuilderException e) {
            File labelsFile = new File(labelsFilePath);
            if (labelsFile.exists()) {
                labelsFile.delete();
            }
            System.out.println("Construction of labels XML failed!");
        }

        meanParsedAttributes /= numInstances;
        boolean Sparse = false;
        if (meanParsedAttributes < numAttributes) {
            Sparse = true;
            System.out.println("Dataset is sparse.");
        }

        // Define Instances class to hold data
        ArrayList<Attribute> attInfo = new ArrayList<Attribute>(numAttributes + numLabels);
        Attribute[] att = new Attribute[numAttributes + numLabels];

        for (int i = 0; i < numAttributes; i++) {
            att[i] = new Attribute("Att" + (i + 1));
            attInfo.add(att[i]);
        }
        ArrayList<String> ClassValues = new ArrayList<String>(2);
        ClassValues.add("0");
        ClassValues.add("1");
        for (int i = 0; i < numLabels; i++) {
            att[numAttributes + i] = new Attribute("Label" + (i + 1), ClassValues);
            attInfo.add(att[numAttributes + i]);
        }

        // Re-read file and convert into multi-label arff
        int countInstances = 0;

        aWriter = new BufferedWriter(new FileWriter(path + targetFilestem + ".arff"));
        Instances data = new Instances(relationName, attInfo, 0);
        aWriter.write(data.toString());

        aReader = new BufferedReader(new FileReader(path + sourceFilename));

        while ((Line = aReader.readLine()) != null) {
            countInstances++;

            // set all  values to 0
            double[] attValues = new double[numAttributes + numLabels];
            Arrays.fill(attValues, 0);

            Instance tempInstance = new DenseInstance(1, attValues);
            tempInstance.setDataset(data);

            // separate class info from attribute info
            // ensure class info exists
            StringTokenizer strTok = new StringTokenizer(Line, " ");

            while (strTok.hasMoreTokens()) {
                String token = strTok.nextToken();

                if (token.indexOf(":") == -1) {
                    // parse label info
                    StringTokenizer labelTok = new StringTokenizer(token, ",");
                    while (labelTok.hasMoreTokens()) {
                        String strLabel = labelTok.nextToken();
                        int intLabel = Integer.parseInt(strLabel);
                        tempInstance.setValue(numAttributes + intLabel, 1);
                    }
                } else {
                    // parse attribute info
                    StringTokenizer AttrTok = new StringTokenizer(token, ":");
                    String strAttrIndex = AttrTok.nextToken();
                    String strAttrValue = AttrTok.nextToken();
                    tempInstance.setValue(Integer.parseInt(strAttrIndex) - 1, Double.parseDouble(strAttrValue));
                }
            }

            if (Sparse) {
                SparseInstance tempSparseInstance = new SparseInstance(tempInstance);
                aWriter.write(tempSparseInstance.toString() + "\n");
            } else {
                aWriter.write(tempInstance.toString() + "\n");
            }

        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            if (aReader != null) {
                aReader.close();
            }
            if (aWriter != null) {
                aWriter.close();
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}

From source file:myclassifier.Main.java

public static void main(String[] args) throws Exception {
    System.out.println("C");
    File f = new File("weather.nominal.arrf");
    if (f.exists() && !f.isDirectory()) {
        System.out.println("A");
    } else {//from   w w  w  . j  ava  2  s . c o  m
        System.out.println("B");
    }
    System.out.println("C");

    WekaAccessor access = new WekaAccessor();
    Instances train_data = access.loadArff("weather.nominal.arrf");
    train_data.toString();
    train_data.firstInstance().toString();
    MyId3 id3 = new MyId3();
    id3.buildClassifier(train_data);
    access.tenFoldCrossValidation(id3, train_data);
}

From source file:myclassifier.MyClassifier.java

/**
 * @param args the command line arguments
 *///from w  w  w .ja  va2  s .  co  m
public static void main(String[] args) throws Exception {

    WekaAccessor access = new WekaAccessor();
    Instances train_data = access.loadArff("weather.nominal.arff");
    train_data.toString();
    train_data.firstInstance().toString();
    MyId3 tree1 = new MyId3();
    MyC45 tree2 = new MyC45();
    J48 tree3 = access.buildC45Classifier(train_data);
    Id3 tree4 = access.buildId3Classifier(train_data);
    tree1.buildClassifier(train_data);
    tree2.buildClassifier(train_data);
    System.out.println("=== My Id3 ===");
    access.tenFoldCrossValidation(tree1, train_data);
    System.out.println("=== My C45 ===");
    access.tenFoldCrossValidation(tree2, train_data);
    System.out.println("=== Weka C45 ===");
    access.tenFoldCrossValidation(tree3, train_data);
    System.out.println("=== Weka Id3 ===");
    access.tenFoldCrossValidation(tree4, train_data);
}

From source file:nl.detoren.ijc.neural.Voorspeller.java

License:Open Source License

public String voorspel(String bestandsnaam) throws FileNotFoundException, IOException, Exception {
    // Lees instances
    BufferedReader reader = new BufferedReader(new FileReader(bestandsnaam));
    Instances datapredict = new Instances(reader);
    datapredict.setClassIndex(datapredict.numAttributes() - 1);
    Instances predicteddata = new Instances(datapredict);
    reader.close();/*from  w  w  w.  j av  a 2 s .co m*/
    // Predict instances
    for (int i = 0; i < datapredict.numInstances(); i++) {
        double clsLabel = mlp.classifyInstance(datapredict.instance(i));
        predicteddata.instance(i).setClassValue(clsLabel);
    }
    logger.log(Level.INFO, predicteddata.toString());
    // Save instances
    String outputBestand = bestandsnaam.substring(0, bestandsnaam.length() - 5) + "_solved.arff";
    BufferedWriter writer = new BufferedWriter(new FileWriter(outputBestand));
    writer.write(predicteddata.toString());
    writer.newLine();
    writer.flush();
    writer.close();

    return null;
}

From source file:OAT.trading.classification.Weka.java

License:Open Source License

@Override
public Prediction predict(InputSample input) {
    if (classifier == null) {
        log(Level.WARNING, "null classifier");
        return null;
    }//ww  w .  ja va  2s  . c o  m

    Instances data = getInstances(input);

    if (data == null) {
        log(Level.WARNING, "null data");
        return null;
    }

    if (!isCrossValidating()) {
        if (isLoggable(Level.FINER)) {
            log(Level.FINER, data.toString());
        }
    }

    try {
        double output = new Evaluation(data).evaluateModelOnce(classifier, data.firstInstance());

        return Prediction.valueOf(output < 0.5 ? -1 : 1);
    } catch (Exception ex) {
        log(Level.SEVERE, null, ex);
    }

    return null;
}

From source file:OAT.trading.classification.Weka.java

License:Open Source License

@Override
public void train(List<TrainingSample> trainingSet) {
    initClassifier();/*from   ww  w. j a v a2  s.com*/

    if (classifier == null) {
        log(Level.WARNING, "null classifier");
        return;
    }

    Instances data = getInstances(trainingSet);

    if (data == null) {
        log(Level.WARNING, "null data");
        return;
    }

    if (!isCrossValidating()) {
        log(Level.FINE, "Training set size: {0}", data.numInstances());

        if (isLoggable(Level.FINER)) {
            log(Level.FINER, data.toString());
        }
    }

    try {
        classifier.buildClassifier(data);

    } catch (UnsupportedAttributeTypeException ex) {
        log(Level.WARNING, "{1}\nCapabilities: {0}",
                new Object[] { ex.getMessage(), classifier.getCapabilities() });

    } catch (Exception ex) {
        log(Level.SEVERE, null, ex);
    }
}

From source file:org.hypknowsys.wumprep.WUMprepWrapper.java

License:Open Source License

/**
 * Creates a dummy dataset from the input format, sends it to the script and
 * reads the script output's ARFF information that in turn is used to set
 * <code>this</code>' output format.
 * //from  w w  w.  j  a v  a  2s . c  o  m
 * This mechanism allows a WUMprep script to alter the recordset layout as
 * long as this change is documented by the output ARFF header. For example,
 * the <tt>dnsLookup.pl</tt> script changes the <code>host_ip</code> field
 * to <code>host_dns</code> when performing IP lookups.
 * 
 * @param instanceInfo
 *          The input format.
 * @return Object containing the output instance structure.
 */
public Instances getScriptOutputFormat(Instances instanceInfo) {
    Instances outputFormat = instanceInfo;
    Instances testData = new Instances(instanceInfo);
    Instance testInstance = new Instance(testData.numAttributes());

    testData.delete();
    testInstance.setDataset(testData);

    // Initialize the testInstance's attribute values
    for (int i = 0; i < testInstance.numAttributes(); i++) {
        String aName = testInstance.attribute(i).name();
        if (aName.equals("host_ip"))
            testInstance.setValue(i, "127.0.0.1");
        else if (aName.equals("ts_day"))
            testInstance.setValue(i, "01");
        else if (aName.equals("ts_month"))
            testInstance.setValue(i, "Jan");
        else if (aName.equals("ts_year"))
            testInstance.setValue(i, "2005");
        else if (aName.equals("ts_hour"))
            testInstance.setValue(i, "11");
        else if (aName.equals("ts_minutes"))
            testInstance.setValue(i, "55");
        else if (aName.equals("ts_seconds"))
            testInstance.setValue(i, "00");
        else if (aName.equals("tz"))
            testInstance.setValue(i, "+0200");
        else
            testInstance.setValue(i, aName + "-dummy");
    }

    testData.add(testInstance);

    WUMprepWrapper testWrapper = new WUMprepWrapper(m_scriptName, m_args);
    testWrapper.start();
    testWrapper.push(testData.toString());
    testWrapper.push((Instance) null);

    class ErrorReader extends Thread implements Serializable {
        /**  */
        private static final long serialVersionUID = -488779846603045891L;
        PipedReader m_input = null;

        /**
         * Helper class for reading stderr output from the WUMprep script
         * 
         * @param input The script's wrapper's stderr pipe reader
         */
        ErrorReader(PipedReader input) {
            m_input = input;
            this.start();
        }

        public void run() {
            try {
                while (m_input.read() >= 0)
                    ;
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }

    // read the stderr output
    new ErrorReader(testWrapper.getErrorPipe());

    try {
        // ignore the stderr output
        outputFormat = new org.hypknowsys.wumprep4weka.core.Instances(testWrapper.getOutputPipe());

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return outputFormat;
}

From source file:org.montp2.m1decol.ter.gramms.filters.FilterTokenizerBoolean.java

License:Open Source License

public void indexingToTokenizer(String inPath, String outPath) throws Exception {
    WordTokenizer wordTokenizer = new WordTokenizer();
    wordTokenizer.setDelimiters("\r \t.,;:'\"()?!");

    Instances inputInstances = WekaUtils.loadARFF(inPath);
    StringToWordVector filter = new StringToWordVector();
    filter.setInputFormat(inputInstances);
    filter.setDoNotOperateOnPerClassBasis(false);
    filter.setInvertSelection(false);//from   ww  w .  j  av  a2  s. c o  m
    filter.setLowerCaseTokens(true);
    filter.setOutputWordCounts(false);
    filter.setTokenizer(wordTokenizer);
    filter.setUseStoplist(true);
    filter.setWordsToKeep(wordsTokeep);

    Instances outputInstances = Filter.useFilter(inputInstances, filter);

    OutputStreamUtils.writeSimple(outputInstances.toString(), outPath);
}

From source file:org.montp2.m1decol.ter.gramms.filters.FilterTokenizerIDFT.java

License:Open Source License

public void indexingToTokenizer(String inPath, String outPath) throws Exception {

    WordTokenizer wordTokenizer = new WordTokenizer();
    wordTokenizer.setDelimiters("\r \t.,;:'\"()?!");

    Instances inputInstances = WekaUtils.loadARFF(inPath);
    StringToWordVector filter = new StringToWordVector();
    filter.setInputFormat(inputInstances);
    filter.setIDFTransform(true);//ww  w.  j a v  a 2  s.co  m
    filter.setTFTransform(true);
    filter.setDoNotOperateOnPerClassBasis(false);
    filter.setInvertSelection(false);
    filter.setLowerCaseTokens(true);
    filter.setMinTermFreq(3);
    filter.setOutputWordCounts(true);
    filter.setTokenizer(wordTokenizer);
    filter.setUseStoplist(true);
    filter.setWordsToKeep(200);

    Instances outputInstances = Filter.useFilter(inputInstances, filter);

    OutputStreamUtils.writeSimple(outputInstances.toString(), outPath);
}

From source file:org.montp2.m1decol.ter.gramms.filters.FilterTokenizerVector.java

License:Open Source License

public void indexingToTokenizer(String inPath, String outPath) throws Exception {
    WordTokenizer wordTokenizer = new WordTokenizer();
    wordTokenizer.setDelimiters("\r \t.,;:'\"()?!");

    Instances inputInstances = WekaUtils.loadARFF(inPath);
    StringToWordVector filter = new StringToWordVector();
    filter.setInputFormat(inputInstances);
    filter.setDoNotOperateOnPerClassBasis(false);
    filter.setInvertSelection(false);/*from   ww  w  .  j  av  a2  s.  c  o m*/
    filter.setLowerCaseTokens(true);
    filter.setMinTermFreq(3);
    filter.setOutputWordCounts(true);
    filter.setTokenizer(wordTokenizer);
    filter.setUseStoplist(true);
    filter.setWordsToKeep(200);

    Instances outputInstances = Filter.useFilter(inputInstances, filter);

    OutputStreamUtils.writeSimple(outputInstances.toString(), outPath);
}