Example usage for weka.core Instances toSummaryString

List of usage examples for weka.core Instances toSummaryString

Introduction

In this page you can find the example usage for weka.core Instances toSummaryString.

Prototype

public String toSummaryString() 

Source Link

Document

Generates a string summarizing the set of instances.

Usage

From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java

License:Open Source License

/**
 * Main method./*w  ww. j  a  va 2s . c  o m*/
 * @param args command line arguments
 */
public static void main(final String[] args) {
    try {
        HydroRunner.init(false);

        Instances newkdb = new Instances(GapFillingKnowledgeDB.getKnowledgeDB());

        System.out.println("Considered fictive gaps -> " + getCountOfFictiveGaps(newkdb));

        System.out.println(newkdb.toSummaryString());

        newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,
                newkdb.attribute("useDownstream").index(), "false");
        newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,
                newkdb.attribute("useUpstream").index(), "false");
        //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useNearest").index(),"false");
        //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useMostSimilar").index(),"false");

        //System.out.println(newkdb.toSummaryString());

        Instances withGoodNashSutcliffe = new Instances(newkdb, 0);
        for (int i = 0; i < newkdb.numInstances(); i++) {
            if (newkdb.instance(i).value(newkdb.attribute("NashSutcliffe").index()) > 0.5d) {
                withGoodNashSutcliffe.add(new DenseInstance(1d, newkdb.instance(i).toDoubleArray()));
            }
        }

        System.out.println(withGoodNashSutcliffe.numInstances() + " / " + newkdb.numInstances());

        final double perc = (double) getCountOfFictiveGaps(withGoodNashSutcliffe)
                / getCountOfFictiveGaps(newkdb);
        System.out.println("Fictive gaps that are infilled with a good Nash-Sutcliffe -> "
                + getCountOfFictiveGaps(withGoodNashSutcliffe) + " (" + perc + "%)");

        WekaDataAccessUtil.saveInstancesIntoARFFFile(withGoodNashSutcliffe,
                new File("./withGoodNashSutcliffe.arff"));
    } catch (final Exception e) {
        e.printStackTrace();
    }
}

From source file:lu.lippmann.cdb.ext.hydviga.util.TransformTimeSeries.java

License:Open Source License

/**
 * Main method.//w w w.jav a2  s .  c o m
 * @param args command line arguments
 */
public static final void main(final String[] args) {
    try {
        final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File("."
                + File.separatorChar + "data_fake" + File.separatorChar + "all_valid_q_series_complete2.arff"));
        System.out.println(dataSet.toSummaryString());

        final int numAttributes = dataSet.numAttributes();
        final int numInstances = dataSet.numInstances();
        for (int i = 0; i < numAttributes; i++) {
            final int i_bis = (int) (Math.random() * (double) (numAttributes - 3));
            final int i_tri = (int) (Math.random() * (double) (numAttributes - 3));

            for (int j = 0; j < numInstances; j++) {
                final Instance instance_j = dataSet.instance(j);

                if (instance_j.isMissing(i))
                    continue;
                if (instance_j.isMissing(i_bis))
                    continue;
                if (instance_j.isMissing(i_tri))
                    continue;

                final double iValue = instance_j.value(i);
                final double iBisValue = instance_j.value(i_bis);
                final double iTriValue = instance_j.value(i_tri);

                instance_j.setValue(i, (iValue + iBisValue + iTriValue));
            }
        }

        WekaDataAccessUtil.saveInstancesIntoARFFFile(dataSet, new File("." + File.separatorChar + "data_fake"
                + File.separatorChar + "all_valid_q_series_complete2_fake.arff"));
    } catch (final Exception e) {
        e.printStackTrace();
    }
}

From source file:mao.datamining.DataSetPair.java

private void doItOnce4All() {
    if (didIt)/*from   w ww  . j a v a  2 s  .  com*/
        return;
    didIt = true;
    try {
        //step 0, remove all those empty columns, which has more than 50% missing values
        Instances orangeDataSet = ConverterUtils.DataSource.read(trainSourceFileName);
        orangeDataSet.setClassIndex(orangeDataSet.numAttributes() - 1);
        Attribute classAttr = orangeDataSet.attribute(orangeDataSet.numAttributes() - 1);
        MainLogger.log(Level.INFO, "Class Attribute: {0}", classAttr.toString());

        //step 0-1, to remove all columns which has more than half missing values
        Instances newData = orangeDataSet;
        RemoveUselessColumnsByMissingValues removeMissingValuesColumns = new RemoveUselessColumnsByMissingValues();
        removeMissingValuesColumns.setM_maxMissingPercentage(50);
        removeMissingValuesColumns.setManualDeleteColumns(columns2Delete);
        removeMissingValuesColumns.setInputFormat(newData);
        newData = Filter.useFilter(newData, removeMissingValuesColumns);
        Main.logging("== New Data After Removing all Columns having >50% missing values: ===\n"
                + newData.toSummaryString());
        try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")))) {
            writer.write(newData.toString());
        }

        //step 0-2 to transform those numeric columns to Nominal
        //to delete those instances with more than half missing values
        BufferedReader reader70 = new BufferedReader(new InputStreamReader(
                new FileInputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")));
        BufferedWriter writerAfterDeleteRows = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff")));
        int columnNum = newData.numAttributes();
        int totalInstanceNum = newData.numInstances(), deleteM1Num = 0, delete1Num = 0;
        String line = null;
        int missingColumnNum = 0;
        while ((line = reader70.readLine()) != null) {
            missingColumnNum = 0;
            for (int i = 0; i < line.length(); i++) {
                if (line.charAt(i) == '?')
                    missingColumnNum++;
            }
            if (missingColumnNum * 100 / columnNum < 50) {
                writerAfterDeleteRows.write(line);
                writerAfterDeleteRows.newLine();
            } else {
                System.out.println("Delete Row: [" + line + "]");
                if (line.endsWith("-1")) {
                    deleteM1Num++;
                } else {
                    delete1Num++;
                }
            }
        }
        System.out.println("Total: " + totalInstanceNum + ", delete class -1: " + deleteM1Num
                + ", delete class 1:  " + delete1Num);
        reader70.close();
        writerAfterDeleteRows.close();

        //create sample files:
        createSampleDataSets();

    } catch (Exception e) {
        Main.logging(null, e);
    }
}

From source file:mao.datamining.DataSetPair.java

/**
 * Pre-Process the training data set with:
 * RemoveUselessColumnsByMissingValues filter
 * SpreadSubsample filter to shrink the majority class instances 
 * AttributeSelection filter with CfsSubsetEval and LinearForwardSelection
 *///from  w w  w  .  j a v a  2 s . c  o  m
private void processTrainRawData() {
    System.out.println("====================" + this.trainFileName + "====================");
    System.out.println("====================" + this.trainFileName + "====================");
    System.out.println("====================" + this.trainFileName + "====================");
    finalTrainAttrList.clear();
    try {
        doItOnce4All();
        String sampleFilePath = null;
        //step 2, either over sample, or under sample
        //weka.filters.supervised.instance.SpreadSubsample
        if (this.resampleMethod.equalsIgnoreCase(resampleUnder)) {
            System.out.println("Under Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterUnderSampling.arff";
        } else if (resampleMethod.equalsIgnoreCase(resampleOver)) {
            System.out.println("Over Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterOverSampling.arff";
        } else if (resampleMethod.equalsIgnoreCase(resampleNone)) {
            //do nothing,
            System.out.println("None Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff";
        } else if (resampleMethod.equalsIgnoreCase(resampleMatrix)) {
            //do nothing
            System.out.println("Matrix Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff";
        } else {
            doNotSupport();
        }
        Instances newData = ConverterUtils.DataSource.read(sampleFilePath);
        newData.setClassIndex(newData.numAttributes() - 1);
        //            Main.logging("== New Data After Resampling class instances: ===\n" + newData.toSummaryString());

        //Step 3, select features
        AttributeSelection attrSelectionFilter = new AttributeSelection();
        ASEvaluation eval = null;
        ASSearch search = null;

        //ranker
        if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionA)) {
            System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss");
            System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss");
            System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss");
            eval = new weka.attributeSelection.InfoGainAttributeEval();
            //weka.attributeSelection.Ranker -T 0.02 -N -1
            search = new Ranker();
            String rankerOptios[] = { "-T", "0.01", "-N", "-1" };
            if (resampleMethod.equalsIgnoreCase(resampleOver)) {
                rankerOptios[1] = "0.1";
            }
            ((Ranker) search).setOptions(rankerOptios);
            Main.logging("== Start to Select Features with InfoGainAttributeEval and Ranker");
        }
        //weka.attributeSelection.LinearForwardSelection -D 0 -N 5 -I -K 50 -T 0
        else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionB)) {
            System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss");
            System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss");
            System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss");
            eval = new CfsSubsetEval();
            search = new LinearForwardSelection();
            String linearOptios[] = { "-D", "0", "-N", "5", "-I", "-K", "50", "-T", "0" };
            ((LinearForwardSelection) search).setOptions(linearOptios);
            Main.logging("== Start to Select Features with CfsSubsetEval and LinearForwardSelection");
        } else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionNo)) {
            System.out.println("None Selection ssssssssssssssssssssssssssssssssssssss");
            Main.logging("No Feature Selection Method");
        } else {
            doNotSupport();
        }

        if (eval != null) {
            attrSelectionFilter.setEvaluator(eval);
            attrSelectionFilter.setSearch(search);
            attrSelectionFilter.setInputFormat(newData);
            newData = Filter.useFilter(newData, attrSelectionFilter);
        }

        Main.logging("== New Data After Selecting Features: ===\n" + newData.toSummaryString());

        //finally, write the final dataset to file system

        try (BufferedWriter writer = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(this.trainFileName)))) {
            writer.write(newData.toString());
        }

        int numAttributes = newData.numAttributes();
        for (int i = 0; i < numAttributes; i++) {
            String attrName = newData.attribute(i).name();
            finalTrainAttrList.add(attrName);
        }
        Main.logging(finalTrainAttrList.toString());
        //            //set the final train dataset
        finalTrainDataSet = newData;
        finalTrainDataSet.setClassIndex(finalTrainDataSet.numAttributes() - 1);

        Main.logging("train dataset class attr: " + finalTrainDataSet.classAttribute().toString());
    } catch (Exception ex) {
        Main.logging(null, ex);
    }

}

From source file:mao.datamining.ModelProcess.java

/**
 * To create and verify the classifier models.
 *
 * @param classifierClazz//from   www.j  a v  a 2s . c om
 * @param options
 */
private void runTestCase(DataSetPair ds, String classifierClazz, String reportFilePath, String[] options,
        String caseNum, boolean runDummy, String testCaseDetailFile) {

    String optionsCopy[] = new String[options.length];
    System.arraycopy(options, 0, optionsCopy, 0, options.length);
    //write the summary
    FileOutputStream testCaseSummaryOut = null;
    try {
        testCaseSummaryOut = new FileOutputStream(testCaseDetailFile);
        testCaseSummaryOut.write(("Train File: " + ds.getTrainFileName()).getBytes());
        testCaseSummaryOut.write("\n\n".getBytes());
    } catch (Exception ex) {
        Logger.getLogger(ModelProcess.class.getName()).log(Level.SEVERE, null, ex);
    }

    //Prepare the TestResult
    TestResult result = new TestResult();
    result.setCaseNum(caseNum);
    result.setMissingValueMode(ds.getMissingProcessMode());
    result.setResampleMode(ds.getResampleMethod());
    result.setFeatureSelectMode(ds.getFeatureSelectionMode());
    result.setClassifier(classifierClazz);
    StringBuilder optionsStr = new StringBuilder();
    for (String s : optionsCopy) {
        if (s == null) {
            break;
        }
        optionsStr.append(s).append(" ");
    }
    result.setClassifierOptions(optionsStr.toString());

    //get the training and test data sets
    Instances finalTrainDataSet = ds.getFinalTrainDataSet();
    Instances finalTestDataSet = ds.getFinalTestDataSet();
    //start building and testing
    ModelProcess.logging(
            "\n\n\n=========================================START=================================================\n"
                    + "======= " + classifierClazz + "," + optionsStr.toString() + "========\n"
                    + "=========================================START=================================================");
    if (runDummy) {
        ModelProcess.logging("Dummy Run the process");
    } else {

        Classifier classifier = null;
        String tmpClazz = null;
        String tmpOption[] = null;
        //if use matrix, 
        //weka.classifiers.meta.CostSensitiveClassifier -cost-matrix "[0.0 2.0; 5.0 0.0]" -S 1 -W weka.classifiers.trees.J48 -- -C 0.25 -M 2
        if (this.useCostMatrix) {
            List<String> costOptionsList = new ArrayList<>();
            costOptionsList.add("-cost-matrix");
            costOptionsList.add("\"[0.0 3.0; 30.0 0.0]\"");
            costOptionsList.add("-S");
            costOptionsList.add("1");
            costOptionsList.add("-W");
            costOptionsList.add(classifierClazz);
            costOptionsList.add("--");
            for (String s : optionsCopy) {
                costOptionsList.add(s);
            }
            String newOptions[] = new String[costOptionsList.size()];
            for (int i = 0; i < newOptions.length; i++) {
                newOptions[i] = costOptionsList.get(i);
            }
            tmpClazz = "weka.classifiers.meta.CostSensitiveClassifier";
            tmpOption = newOptions;
        } else {
            tmpClazz = classifierClazz;
            tmpOption = optionsCopy;
        } //end create the classifier

        try {

            try {
                //finalTrainDataSet
                testCaseSummaryOut.write(
                        ("Data Set Summary: " + finalTrainDataSet.toSummaryString() + "\n\n").getBytes());
                testCaseSummaryOut.write(("classifier: " + tmpClazz + "\n").getBytes());
                testCaseSummaryOut.write(("options: " + Arrays.toString(tmpOption) + "\n\n").getBytes());
            } catch (FileNotFoundException ex) {
                Logger.getLogger(ModelProcess.class.getName()).log(Level.SEVERE, null, ex);
            }

            classifier = (Classifier) Utils.forName(Classifier.class, tmpClazz, tmpOption);
        } catch (Exception ex) {
            Logger.getLogger(ModelProcess.class.getName()).log(Level.SEVERE, null, ex);
        } //end create the classifier

        //            this.testCV(classifier, finalTrainDataSet, testCaseSummaryOut, result);
        this.testWithExtraDS(classifier, finalTrainDataSet, finalTestDataSet, testCaseSummaryOut, result);

        try {
            testCaseSummaryOut.close();
        } catch (IOException ex) {
            Logger.getLogger(ModelProcess.class.getName()).log(Level.SEVERE, null, ex);
        }

        ModelProcess.logging(
                "========================================END==================================================\n"
                        + result.toString()
                        + "========================================END==================================================");

        this.lockFile(reportFilePath);
        try (FileOutputStream fout = new FileOutputStream(reportFilePath, true)) {
            fout.write(result.toString().getBytes());
            fout.write("\n".getBytes());
            fout.flush();
            fout.close();
        } catch (Exception ex) {
            Main.logging(null, ex);
        }
        this.unLockFile(reportFilePath);
    }
}

From source file:mlpoc.MLPOC.java

/**
 * @param args the command line arguments
 *///  w w w . j a  va  2 s . c  o m
public static void main(String[] args) {
    try {
        // TODO code application logic here
        BufferedReader br;
        br = new BufferedReader(
                new FileReader("D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff"));
        Instances training_data = new Instances(br);
        br.close();
        training_data.setClassIndex(training_data.numAttributes() - 1);
        br = new BufferedReader(new FileReader("D:/Extra/B.E Project/agrodeploy/webapp/Data/TestFinal.arff"));
        Instances testing_data = new Instances(br);
        br.close();
        testing_data.setClassIndex(testing_data.numAttributes() - 1);
        String summary = training_data.toSummaryString();
        int number_samples = training_data.numInstances();
        int number_attributes_per_sample = training_data.numAttributes();
        System.out.println("Number of attributes in model = " + number_attributes_per_sample);
        System.out.println("Number of samples = " + number_samples);
        System.out.println("Summary: " + summary);
        System.out.println();

        J48 j48 = new J48();
        FilteredClassifier fc = new FilteredClassifier();
        fc.setClassifier(j48);
        fc.buildClassifier(training_data);
        System.out.println("Testing instances: " + testing_data.numInstances());
        for (int i = 0; i < testing_data.numInstances(); i++) {
            double pred = fc.classifyInstance(testing_data.instance(i));
            String s1 = testing_data.classAttribute().value((int) pred);
            System.out.println(testing_data.instance(i) + " Predicted value: " + s1);
        }
        Evaluation crossValidate = crossValidate(
                "D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff");

        DataSource source = new DataSource(
                "D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff");
        Instances data = source.getDataSet();
        System.out.println(data.numInstances());
        data.setClassIndex(data.numAttributes() - 1);

        // 1. meta-classifier
        useClassifier(data);

        // 2. filter
        useFilter(data);
    } catch (Exception ex) {
        Logger.getLogger(MLPOC.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:sentinets.Prediction.java

License:Open Source License

public void writeStats(Instances tweetInstances) {
    //TweetCorpusStatistics stats = new TweetCorpusStatistics();
    System.out.println("Stats Instances: \n" + tweetInstances.toSummaryString());
    for (int i = 0; i < tweetInstances.size(); i++) {
        String user = tweetInstances.get(i).stringValue(11 - 1);
        String mentions = tweetInstances.get(i).stringValue(3 - 1);
        String hashtags = tweetInstances.get(i).stringValue(14 - 1);
        String epClass = tweetInstances.get(i).stringValue(15 - 1);
        String snsClass = tweetInstances.get(i).stringValue(16 - 1);
        System.out.println("Tweet Details:\t" + user + "\t" + mentions + "\t" + hashtags + "\t"
                + printDist(classDist.get(i)));
        //stats.updateStatistics(user, mentions, hashtags, epClass+","+snsClass, classDist.get(i));
    }/*from w ww .  j a v a  2 s  . co m*/
}

From source file:soccer.core.MyMatchLoader.java

public void test() throws IOException, Exception {
    Instances instances = loader.getDataSet();
    NumericToNominal nm = new NumericToNominal();
    nm.setOptions(new String[] { "-R", "last" });
    nm.setInputFormat(instances);/*  w ww . java 2s.c  om*/
    instances = Filter.useFilter(instances, nm);
    instances.setClassIndex(instances.numAttributes() - 1);
    System.out.println(instances.toSummaryString());
}

From source file:soccer.core.MyPlayerLoader.java

public void test() throws IOException, Exception {
    Instances instances = getInstances();

    System.out.println(instances.toSummaryString());
}