Example usage for weka.core Instances toString

List of usage examples for weka.core Instances toString

Introduction

In this page you can find the example usage for weka.core Instances toString.

Prototype

@Override
public String toString() 

Source Link

Document

Returns the dataset as a string in ARFF format.

Usage

From source file:org.montp2.m1decol.ter.utils.WekaUtils.java

License:Open Source License

public static void createARFF(String inPath, String outPath, List<String> excludeFiles) throws IOException {

    FastVector atts = new FastVector(1);
    atts.addElement(new Attribute("data", (FastVector) null));
    Instances data = new Instances("CategorizeUserForum", atts, 0);

    for (File file : FileUtils.ls(inPath)) {
        if (!excludeFiles.contains(file.getName())) {
            double[] newInstance = new double[1];
            newInstance[0] = (double) data.attribute(0)
                    .addStringValue(InputStreamUtils.readInputStream(file.getAbsolutePath()));
            data.add(new Instance(1.0, newInstance));
        }/*from   w  ww  .j ava2 s.  co  m*/
    }

    OutputStreamUtils.writeSimple(data.toString(), outPath);
}

From source file:org.univ.montp2.master.gmin313.DataCrawler.java

public static void main(String[] args) {
    try {//www  .  java 2 s.  com
        //crawlTwitter();
        File crawlDir = new File(crawlResultDir);
        delete(crawlDir);
        crawlDir.mkdir();
        MyCrawler crawler = new MyCrawler();
        crawler.crawlWebSites();
        Instances dataset = createDataset(crawlResultDir);
        java.io.File theFile = new java.io.File(workingDir + "/output/weka.arff");
        System.out.println("Directory : " + theFile.getAbsolutePath());
        FileWriter fw = null;
        fw = new FileWriter(theFile.getAbsolutePath());
        try (BufferedWriter out = new BufferedWriter(fw)) {
            out.write(dataset.toString());
        }
        //System.out.println(dataset)
    } catch (Exception ex) {
        java.util.logging.Logger.getLogger(DataCrawler.class.getName()).log(Level.SEVERE, null, ex);
        System.err.println(ex.getMessage());
        ex.printStackTrace();
    }
}

From source file:org.univ.montp2.master.ncbi.api.ncbi.java

/**
 * @param args the command line arguments
 *//*from  w  w w  . j a v a2 s  .c o  m*/
public static void main(String[] args) {
    //PropertyConfigurator.configure(ncbi.class.getResource("log4j.properties"));
    try {
        // TODO code application logic here
        getDataFromNcbi();
        Instances dataset = getDataSet(workingDir + "output/tagged/ncbi/");
        java.io.File theFile = new java.io.File(workingDir + "/output/taggedArticleNcbi.arff");
        System.out.println("Directory : " + theFile.getAbsolutePath());
        FileWriter fw = new FileWriter(theFile.getAbsolutePath());
        if (dataset != null) {
            try (BufferedWriter out = new BufferedWriter(fw)) {
                out.write(dataset.toString());
            }
        }
    } catch (Exception ex) {
        Logger.getLogger(ncbi.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:parkinsonpredictor.ParkinsonPredictor.java

/**
 * Do principal component analysis/* w  w w .  j  a va 2 s.c o m*/
 */
public static void doPCA() {
    PrincipalComponents pc = new PrincipalComponents();
    DataSource source;
    try {
        source = new DataSource(".\\parkinsonDataTruncate.libsvm");
        Instances data = source.getDataSet();
        pc.setInputFormat(data);
        pc.setMaximumAttributes(100);
        Instances newData = Filter.useFilter(data, pc);

        Path file = Paths.get("parkinsonDataPCA.txt");
        List<String> lines = Arrays.asList(newData.toString());
        Files.write(file, lines, Charset.forName("UTF-8"));
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:personality_prediction.Classifier.java

/**
 * @param args the command line arguments
 *//*from  ww  w.j av  a2 s  .com*/
void run_classifier() {
    // TODO code application logic here
    try {
        //csv2arff();
        System.out.println("Enter the class for which you want to classify");
        System.out.println("1..Openness");
        System.out.println("2..Neuroticism");
        System.out.println("3..Agreeableness");
        System.out.println("4..Conscientiousness");
        System.out.println("5..Extraversion");
        System.out.println();
        Scanner sc = new Scanner(System.in);
        int choice = sc.nextInt();
        String filename = "";
        // BufferedReader reader=new BufferedReader(new FileReader(""));
        if (choice == 1) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_open.arff";
            //reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_open.arff"));
        } else if (choice == 2) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_neur.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_neur.arff"));
        } else if (choice == 3) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_agr.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_agr.arff"));
        } else if (choice == 4) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_con.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_con.arff"));
        } else if (choice == 5) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_extr.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_extr.arff"));  
        }
        //BufferedReader reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\"));
        // DataSource source = new DataSource("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_Dataset\\Features_value.arff");
        //Instances data = source.getDataSet();
        BufferedReader reader = new BufferedReader(new FileReader(filename));
        Instances data = new Instances(reader);
        reader.close();
        //******88setting class attribute************
        data.setClassIndex(data.numAttributes() - 1);

        //  OptionsToCode option=new OptionsToCode();
        // String options[]={"java","ExperimentDemo","-classifier weka.classifiers.trees.M5P","-exptype regression","-splittype randomsplit","-runs 10",
        //"-percentage 66","-result /some/where/results.arff","-t bolts.arff","-t bodyfat.arff"};
        // String[] options={"weka.classifiers.functions.SMO"};
        //String[] options={"weka.classifiers.trees.M5P"};
        //option.convert(options);

        //*******************building a classifier*********************
        String[] options = new String[1];
        options[0] = "-U"; // unpruned tree
        J48 tree = new J48(); // new instance of tree
        tree.setOptions(options); // set the options
        tree.buildClassifier(data); // build classifier

        if (choice == 1) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open.arff");
        } else if (choice == 2) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur.arff");       
        } else if (choice == 3) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr.arff";
            // fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr.arff");              
        } else if (choice == 4) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con.arff");                
        } else if (choice == 5) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr.arff");              
        }
        FileReader fr = new FileReader(filename);
        BufferedReader br = new BufferedReader(fr);
        Instances unlabeled = new Instances(br);
        /// Instances unlabeled = new Instances(
        //  new BufferedReader(
        //  new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_Dataset\\experiment\\test_data_unlabelled.arff")));
        // set class attribute
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);
        // create copy
        Instances labeled = new Instances(unlabeled);
        // label instances
        for (int i = 0; i < unlabeled.numInstances(); i++) {
            double clsLabel = tree.classifyInstance(unlabeled.instance(i));
            labeled.instance(i).setClassValue(clsLabel);
        }
        // save labeled data

        if (choice == 1) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open_labelled.arff";
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open123.arff");
        } else if (choice == 2) {
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur_labelled.arff");       
        } else if (choice == 3) {
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr_labelled.arff");              
        } else if (choice == 4) {
            //fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con_labelled.arff");                
        } else if (choice == 5) {
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr_labelled.arff");              
        }
        FileWriter fr1 = new FileWriter(filename);
        BufferedWriter writer = new BufferedWriter(fr1);
        writer.write(labeled.toString());
        writer.newLine();
        writer.flush();
        writer.close();
    } catch (Exception e) {
        System.out.println(e.getLocalizedMessage());
    }
}

From source file:sentinets.Prediction.java

License:Open Source License

public int writePredictions(Instances ins, String filePrefix) {
    try {// w  ww. j  a  va  2s .c om
        System.out.println("Trying to create the following files:");
        System.out.println(outputDir + "/" + filePrefix + ".arff");
        System.out.println(outputDir + "/" + filePrefix + ".tsv");
        BufferedWriter writer = new BufferedWriter(new FileWriter(outputDir + "/" + filePrefix + ".arff"));
        writer.write(ins.toString());
        writer.newLine();
        writer.flush();
        writer.close();
        CSVSaver s = new CSVSaver();

        s.setFile(new File(outputDir + "/" + filePrefix + ".tsv"));
        s.setInstances(ins);
        s.setFieldSeparator("\t");
        s.writeBatch();

    } catch (IOException e) {
        e.printStackTrace();
        return 1;
    }
    return 0;
}

From source file:sentinets.SentiNets.java

License:Open Source License

public void writePredictions(Instances ins, String filePrefix) {
    try {/*from  w w  w .j  av a 2s.  co m*/
        BufferedWriter writer = new BufferedWriter(new FileWriter(outputDir + "/" + filePrefix + ".arff"));
        writer.write(ins.toString());
        writer.newLine();
        writer.flush();
        writer.close();
        CSVSaver s = new CSVSaver();

        s.setFile(new File(outputDir + "/" + filePrefix + ".tsv"));
        s.setInstances(ins);
        s.setFieldSeparator("\t");
        s.writeBatch();

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:tclass.ToArff.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ToArff thisExp = new ToArff();
    thisExp.parseArgs(args);/*from   w w w.  j a  va 2s .  com*/
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.inFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    trainStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);

    }
    try {
        FileWriter fw = new FileWriter(thisExp.outFile);
        fw.write(data.toString());
        fw.close();
    } catch (Exception e) {
        throw new Exception("Could not write to output file. ");
    }
}

From source file:trainableSegmentation.Trainable_Segmentation.java

License:GNU General Public License

/**
 * Write current instances into an ARFF file
 * @param data set of instances//from  w w  w . j a v a2s .com
 * @param filename ARFF file name
 */
public void writeDataToARFF(Instances data, String filename) {
    try {
        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename)));
        try {
            out.write(data.toString());
            out.close();
        } catch (IOException e) {
            IJ.showMessage("IOException");
        }
    } catch (FileNotFoundException e) {
        IJ.showMessage("File not found!");
    }

}

From source file:trainableSegmentation.WekaSegmentation.java

License:GNU General Public License

/**
 * Write current instances into an ARFF file
 * @param data set of instances//from  w  w  w .ja  v  a  2 s  . c om
 * @param filename ARFF file name
 */
public boolean writeDataToARFF(Instances data, String filename) {
    BufferedWriter out = null;
    try {
        out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename)));

        final Instances header = new Instances(data, 0);
        out.write(header.toString());

        for (int i = 0; i < data.numInstances(); i++) {
            out.write(data.get(i).toString() + "\n");
        }
    } catch (Exception e) {
        IJ.log("Error: couldn't write instances into .ARFF file.");
        IJ.showMessage("Exception while saving data as ARFF file");
        e.printStackTrace();
        return false;
    } finally {
        try {
            out.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    return true;

}