Example usage for weka.core Instances setRelationName

List of usage examples for weka.core Instances setRelationName

Introduction

In this page you can find the example usage for weka.core Instances setRelationName.

Prototype

public void setRelationName(String newName) 

Source Link

Document

Sets the relation's name.

Usage

From source file:adams.opt.optimise.genetic.AbstractGeneticAlgorithm.java

License:Open Source License

/**
 * Creates a new dataset, with the setup as the new relation name.
 *
 * @param data   the data to replace the relation name with the setup
 * @param job      the associated job//from   www .ja  va  2s. co m
 * @return      the updated dataset
 */
protected Instances updateHeader(Instances data) {
    Properties props;

    props = storeSetup(data);
    data.setRelationName(props.toString());

    return data;
}

From source file:adams.opt.optimise.genetic.fitnessfunctions.AttributeSelection.java

License:Open Source License

/**
 * Callback for best measure so far/*  w  w w  .j av a  2  s  .c  o  m*/
 */
@Override
public void newBest(double val, OptData opd) {
    int cnt = 0;
    int[] weights = getWeights(opd);
    Instances newInstances = new Instances(getInstances());
    for (int i = 0; i < getInstances().numInstances(); i++) {
        Instance in = newInstances.instance(i);
        cnt = 0;
        for (int a = 0; a < getInstances().numAttributes(); a++) {
            if (a == getInstances().classIndex())
                continue;
            if (weights[cnt++] == 0) {
                in.setValue(a, 0);
            } else {
                in.setValue(a, in.value(a));
            }
        }
    }
    try {
        File file = new File(getOutputDirectory().getAbsolutePath() + File.separator
                + Double.toString(getMeasure().adjust(val)) + ".arff");
        file.createNewFile();
        Writer writer = new BufferedWriter(new FileWriter(file));
        Instances header = new Instances(newInstances, 0);

        // remove filter setup
        Remove remove = new Remove();
        remove.setAttributeIndices(getRemoveAsString(weights));
        remove.setInvertSelection(true);

        header.setRelationName(OptionUtils.getCommandLine(remove));

        writer.write(header.toString());
        writer.write("\n");
        for (int i = 0; i < newInstances.numInstances(); i++) {
            writer.write(newInstances.instance(i).toString());
            writer.write("\n");
        }
        writer.flush();
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:de.ugoe.cs.cpdp.execution.ClassifierCreationExperiment.java

License:Apache License

/**
 * Executes the experiment with the steps as described in the class comment.
 * // w  w w.j  a v  a2 s.co m
 * @see Runnable#run()
 */
@Override
public void run() {
    final List<SoftwareVersion> versions = new LinkedList<>();

    boolean writeHeader = true;

    for (IVersionLoader loader : config.getLoaders()) {
        versions.addAll(loader.load());
    }

    File resultsDir = new File(config.getResultsPath());
    if (!resultsDir.exists()) {
        resultsDir.mkdir();
    }

    int versionCount = 1;
    for (SoftwareVersion testVersion : versions) {

        // At first: traindata == testdata
        Instances testdata = testVersion.getInstances();
        Instances traindata = new Instances(testdata);
        List<Double> efforts = testVersion.getEfforts();

        // Give the dataset a new name
        testdata.setRelationName(testVersion.getProject());

        for (IProcessesingStrategy processor : config.getPreProcessors()) {
            Console.traceln(Level.FINE,
                    String.format("[%s] [%02d/%02d] %s: applying preprocessor %s", config.getExperimentName(),
                            versionCount, versions.size(), testVersion.getProject(),
                            processor.getClass().getName()));
            processor.apply(testdata, traindata);
        }

        for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors()) {
            Console.traceln(Level.FINE,
                    String.format("[%s] [%02d/%02d] %s: applying pointwise selection %s",
                            config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(),
                            dataselector.getClass().getName()));
            traindata = dataselector.apply(testdata, traindata);
        }

        for (IProcessesingStrategy processor : config.getPostProcessors()) {
            Console.traceln(Level.FINE,
                    String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s",
                            config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(),
                            processor.getClass().getName()));
            processor.apply(testdata, traindata);
        }

        // Trainerlist for evaluation later on
        List<ITrainer> allTrainers = new LinkedList<>();

        for (ITrainingStrategy trainer : config.getTrainers()) {

            // Add trainer to list for evaluation
            allTrainers.add(trainer);

            // Train classifier
            trainer.apply(traindata);

            if (config.getSaveClassifier()) {
                // If classifier should be saved, train him and save him
                // be careful with typecasting here!
                IWekaCompatibleTrainer trainerToSave = (IWekaCompatibleTrainer) trainer;
                // Console.println(trainerToSave.getClassifier().toString());
                try {
                    weka.core.SerializationHelper.write(resultsDir.getAbsolutePath() + "/" + trainer.getName()
                            + "-" + testVersion.getProject(), trainerToSave.getClassifier());
                } catch (Exception e) {
                    e.printStackTrace();
                }

            }
        }

        for (IEvaluationStrategy evaluator : config.getEvaluators()) {
            Console.traceln(Level.FINE,
                    String.format("[%s] [%02d/%02d] %s: applying evaluator %s", config.getExperimentName(),
                            versionCount, versions.size(), testVersion.getProject(),
                            evaluator.getClass().getName()));

            if (writeHeader) {
                evaluator.setParameter(config.getResultsPath() + "/" + config.getExperimentName() + ".csv");
            }
            evaluator.apply(testdata, traindata, allTrainers, efforts, writeHeader, config.getResultStorages());
            writeHeader = false;
        }

        versionCount++;

        Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished", config.getExperimentName(),
                versionCount, versions.size(), testVersion.getProject()));

    }

}

From source file:de.ugoe.cs.cpdp.loader.NetgeneLoader.java

License:Apache License

@Override
public Instances load(File fileMetricsFile) {
    // first determine all files
    String path = fileMetricsFile.getParentFile().getAbsolutePath();
    String project = fileMetricsFile.getName().split("_")[0];
    File bugsFile = new File(path + "/" + project + "_bugs_per_file.csv");
    File networkMetrics = new File(path + "/" + project + "_network_metrics.csv");
    Instances metricsData = null;

    try {/*from w  w w  .j  a  va  2s  .  com*/
        CSVLoader wekaCsvLoader = new CSVLoader();
        wekaCsvLoader.setSource(fileMetricsFile);
        metricsData = wekaCsvLoader.getDataSet();
        wekaCsvLoader.setSource(bugsFile);
        Instances bugsData = wekaCsvLoader.getDataSet();
        wekaCsvLoader.setSource(networkMetrics);
        Instances networkData = wekaCsvLoader.getDataSet();

        metricsData.setRelationName(project);

        // fix nominal attributes (i.e., NA values)
        for (int j = 2; j < networkData.numAttributes(); j++) {
            if (networkData.attribute(j).isNominal()) {
                String attributeName = networkData.attribute(j).name();
                double[] tmpVals = new double[networkData.size()];
                // get temporary values
                for (int i = 0; i < networkData.size(); i++) {
                    Instance inst = networkData.instance(i);
                    if (!inst.isMissing(j)) {
                        String val = networkData.instance(i).stringValue(j);
                        try {
                            tmpVals[i] = Double.parseDouble(val);
                        } catch (NumberFormatException e) {
                            // not a number, using 0.0;
                            tmpVals[i] = 0.0;
                        }
                    } else {
                        tmpVals[i] = 0.0;
                    }
                }
                // replace attribute
                networkData.deleteAttributeAt(j);
                networkData.insertAttributeAt(new Attribute(attributeName), j);
                for (int i = 0; i < networkData.size(); i++) {
                    networkData.instance(i).setValue(j, tmpVals[i]);
                }
            }
        }
        // fix string attributes
        for (int j = 2; j < networkData.numAttributes(); j++) {
            if (networkData.attribute(j).isString()) {
                String attributeName = networkData.attribute(j).name();
                double[] tmpVals = new double[networkData.size()];
                // get temporary values
                for (int i = 0; i < networkData.size(); i++) {
                    Instance inst = networkData.instance(i);
                    if (!inst.isMissing(j)) {
                        String val = networkData.instance(i).stringValue(j);
                        try {
                            tmpVals[i] = Double.parseDouble(val);
                        } catch (NumberFormatException e) {
                            // not a number, using 0.0;
                            tmpVals[i] = 0.0;
                        }
                    } else {
                        tmpVals[i] = 0.0;
                    }
                }
                // replace attribute
                networkData.deleteAttributeAt(j);
                networkData.insertAttributeAt(new Attribute(attributeName), j);
                for (int i = 0; i < networkData.size(); i++) {
                    networkData.instance(i).setValue(j, tmpVals[i]);
                }
            }
        }

        Map<String, Integer> filenames = new HashMap<>();
        for (int j = 0; j < metricsData.size(); j++) {
            filenames.put(metricsData.instance(j).stringValue(0), j);
        }
        // merge with network data
        int attributeIndex;
        for (int j = 2; j < networkData.numAttributes(); j++) {
            attributeIndex = metricsData.numAttributes();
            metricsData.insertAttributeAt(networkData.attribute(j), attributeIndex);
            for (int i = 0; i < networkData.size(); i++) {
                Integer instanceIndex = filenames.get(networkData.instance(i).stringValue(1));
                if (instanceIndex != null) {
                    metricsData.instance(instanceIndex).setValue(attributeIndex,
                            networkData.instance(i).value(j));
                }
            }
        }

        // add bug information
        attributeIndex = metricsData.numAttributes();
        final ArrayList<String> classAttVals = new ArrayList<String>();
        classAttVals.add("0");
        classAttVals.add("1");
        final Attribute classAtt = new Attribute("bug", classAttVals);
        metricsData.insertAttributeAt(classAtt, attributeIndex);
        for (int i = 0; i < bugsData.size(); i++) {
            if (bugsData.instance(i).value(2) > 0.0d) {
                Integer instanceIndex = filenames.get(bugsData.instance(i).stringValue(1));
                if (instanceIndex != null) {
                    metricsData.instance(instanceIndex).setValue(attributeIndex, 1.0);
                }
            }
        }

        // remove filenames
        metricsData.deleteAttributeAt(0);
        Attribute eigenvector = metricsData.attribute("eigenvector");
        if (eigenvector != null) {
            for (int j = 0; j < metricsData.numAttributes(); j++) {
                if (metricsData.attribute(j) == eigenvector) {
                    metricsData.deleteAttributeAt(j);
                }
            }
        }

        metricsData.setClassIndex(metricsData.numAttributes() - 1);

        // set all missing values to 0
        for (int i = 0; i < metricsData.size(); i++) {
            for (int j = 0; j < metricsData.numAttributes(); j++) {
                if (metricsData.instance(i).isMissing(j)) {
                    metricsData.instance(i).setValue(j, 0.0d);
                }
            }
        }
    } catch (IOException e) {
        Console.traceln(Level.SEVERE, "failure reading file: " + e.getMessage());
        metricsData = null;
    }
    return metricsData;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts the instances in the given dataset to binary, setting the specified labels to positive.
 * Note this method is destructive to data, directly modifying its contents.
 * @param data the multiclass dataset to be converted to binary.
 * @param positiveClassValue the class value to treat as positive.
 *///from   w w w .j  a  va 2  s  .  c o m
public static void convertMulticlassToBinary(Instances data, String positiveClassValue) {

    // ensure that data is nominal
    if (!data.classAttribute().isNominal())
        throw new IllegalArgumentException("Instances must have a nominal class.");

    // create the new class attribute
    FastVector newClasses = new FastVector(2);
    newClasses.addElement("Y");
    newClasses.addElement("N");
    Attribute newClassAttribute = new Attribute("class", newClasses);

    // alter the class attribute to be binary
    int newClassAttIdx = data.classIndex();
    data.insertAttributeAt(newClassAttribute, newClassAttIdx);
    int classAttIdx = data.classIndex();

    // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively)
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        Instance inst = data.instance(instIdx);
        if (inst.stringValue(classAttIdx).equals(positiveClassValue)) {
            inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y
        } else {
            inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0
        }
    }

    // switch the class index to the new class and delete the old class
    data.setClassIndex(newClassAttIdx);
    data.deleteAttributeAt(classAttIdx);

    // alter the dataset name
    data.setRelationName(data.relationName() + "-" + positiveClassValue);
}

From source file:jjj.asap.sas.parser.job.ImportParserData.java

License:Open Source License

private void process(final String parent, int essaySet, Map<Double, List<String>> tags,
        Map<Double, List<String>> parseTrees, Map<Double, List<String>> depends) {

    // check if output exists
    boolean any = false;

    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-extra-stats.arff"))
        any = true;/*from w w  w.j a v a  2 s.  c om*/
    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-pos-tags.arff"))
        any = true;
    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-parse-tree.arff"))
        any = true;
    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends0.arff"))
        any = true;
    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends1.arff"))
        any = true;
    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends2.arff"))
        any = true;
    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends3.arff"))
        any = true;
    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends4.arff"))
        any = true;
    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends5.arff"))
        any = true;
    if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends6.arff"))
        any = true;

    if (!any) {
        Job.log("NOTE", "work/datasets/" + parent + "/" + essaySet
                + "-*.arff returns all required datasets - nothing to do");
        return;
    }

    // Load an existing dataset to use as a template.
    Instances dataset = Dataset.load("work/datasets/" + parent + "/" + essaySet + "-spell-checked.arff");

    // create the output datasets here. except for the extra statistics, 
    // the format is the same as 'dataset'.

    Instances tagsData = new Instances(dataset, 0);
    tagsData.setRelationName(essaySet + "-pos-tags.arff");
    Instances treeData = new Instances(dataset, 0);
    treeData.setRelationName(essaySet + "-parse-tree.arff");

    Instances dependsData[] = new Instances[7];
    for (int j = 0; j < 7; j++) {
        dependsData[j] = new Instances(dataset, 0);
        dependsData[j].setRelationName(essaySet + "-depends" + j + ".arff");
    }

    // extra stats
    DatasetBuilder builder = new DatasetBuilder();
    builder.addVariable("id");
    if (Contest.isMultiChoice(essaySet)) {
        builder.addNominalVariable("color", Contest.COLORS);
    }
    builder.addVariable("x_sent");
    builder.addVariable("x_para");
    builder.addVariable("x_length");
    builder.addVariable("x_words");
    builder.addVariable("x_unique_words");
    builder.addNominalVariable("score", Contest.getRubrics(essaySet));

    Instances extraStats = builder.getDataset(essaySet + "-extra-stats.arff");

    // now add rows for each instance

    for (int i = 0; i < dataset.numInstances(); i++) {

        // common variables
        Instance ob = dataset.instance(i);
        double id = ob.value(0);
        String y = ob.isMissing(dataset.numAttributes() - 1) ? null
                : ob.stringValue(dataset.numAttributes() - 1);
        String color = Contest.isMultiChoice(essaySet) ? ob.stringValue(dataset.attribute("color")) : null;
        String str = ob.stringValue(dataset.attribute("text"));

        //
        // Extra stats
        //

        int nSent = tags.containsKey(id) ? tags.get(id).size() : 0;
        int nPara = 0;
        for (int a = 0; a < str.length(); a++) {
            if (str.charAt(a) == '^')
                nPara++;
        }
        int nLength = str.length();
        int nWords = 0;
        int nUniqueWords = 0;
        String[] words = str.toLowerCase().split(" ");
        nWords = words.length;
        Set<String> u = new HashSet<String>();
        for (String w : words) {
            u.add(w);
        }
        nUniqueWords = u.size();

        extraStats.add(new DenseInstance(extraStats.numAttributes()));
        Instance extra = extraStats.lastInstance();
        extra.setValue(0, id);
        if (Contest.isMultiChoice(essaySet)) {
            extra.setValue(1, color);
        }

        extra.setValue(extraStats.attribute("x_sent"), nSent);
        extra.setValue(extraStats.attribute("x_para"), nPara);
        extra.setValue(extraStats.attribute("x_length"), nLength);
        extra.setValue(extraStats.attribute("x_words"), nWords);
        extra.setValue(extraStats.attribute("x_unique_words"), nUniqueWords);

        if (y == null)
            extra.setValue(extraStats.numAttributes() - 1, Utils.missingValue());
        else
            extra.setValue(extraStats.numAttributes() - 1, y);

        //
        // POS tags
        //

        String tagsText = "";
        List<String> tagsList = tags.get(id);
        if (tagsList == null || tagsList.isEmpty()) {
            Job.log("WARNING", "no tags for " + id);
            tagsText = "x";
        } else {
            for (String tagsItem : tagsList) {
                tagsText += tagsItem;
            }
        }

        tagsData.add(new DenseInstance(ob.numAttributes()));
        Instance tagsOb = tagsData.lastInstance();
        tagsOb.setValue(0, id);
        if (Contest.isMultiChoice(essaySet)) {
            tagsOb.setValue(1, color);
            tagsOb.setValue(2, tagsText.trim());
            if (y == null) {
                tagsOb.setValue(3, Utils.missingValue());
            } else {
                tagsOb.setValue(3, y);
            }
        } else {
            tagsOb.setValue(1, tagsText.trim());
            if (y == null) {
                tagsOb.setValue(2, Utils.missingValue());
            } else {
                tagsOb.setValue(2, y);
            }
        }

        //
        // Parse Tree
        //

        String treeText = "";
        List<String> treeList = parseTrees.get(id);
        if (treeList == null || treeList.isEmpty()) {
            Job.log("WARNING", "no parse tree for " + id);
            treeText = "x";
        } else {
            for (String treeItem : treeList) {
                treeText += treeItem;
            }
        }

        treeData.add(new DenseInstance(ob.numAttributes()));
        Instance treeOb = treeData.lastInstance();
        treeOb.setValue(0, id);
        if (Contest.isMultiChoice(essaySet)) {
            treeOb.setValue(1, color);
            treeOb.setValue(2, treeText.trim());
            if (y == null) {
                treeOb.setValue(3, Utils.missingValue());
            } else {
                treeOb.setValue(3, y);
            }
        } else {
            treeOb.setValue(1, treeText.trim());
            if (y == null) {
                treeOb.setValue(2, Utils.missingValue());
            } else {
                treeOb.setValue(2, y);
            }
        }

        //
        // Depends data
        //

        for (int j = 0; j < 7; j++) {

            String text = "";
            List<String> list = depends.get(id);
            if (list == null || list.isEmpty()) {
                Job.log("WARNING", "no depends for " + id);
                text = "x";
            } else {
                for (String item : list) {
                    String[] term = StringUtils.safeSplit(item, "/", 3);
                    switch (j) {
                    case 0:
                        text += item;
                        break;
                    case 1:
                        text += term[1] + "/" + term[2];
                        break;
                    case 2:
                        text += term[0] + "/" + term[2];
                        break;
                    case 3:
                        text += term[0] + "/" + term[1];
                        break;
                    case 4:
                        text += term[0];
                        break;
                    case 5:
                        text += term[1];
                        break;
                    case 6:
                        text += term[2];
                        break;
                    }
                    text += " ";
                }
            }

            dependsData[j].add(new DenseInstance(ob.numAttributes()));
            Instance dependsOb = dependsData[j].lastInstance();
            dependsOb.setValue(0, id);
            if (Contest.isMultiChoice(essaySet)) {
                dependsOb.setValue(1, color);
                dependsOb.setValue(2, text.trim());
                if (y == null) {
                    dependsOb.setValue(3, Utils.missingValue());
                } else {
                    dependsOb.setValue(3, y);
                }
            } else {
                dependsOb.setValue(1, text.trim());
                if (y == null) {
                    dependsOb.setValue(2, Utils.missingValue());
                } else {
                    dependsOb.setValue(2, y);
                }
            }

        } // j
    } // dataset

    // Now save the new datasets

    Dataset.save("work/datasets/" + parent + "/" + tagsData.relationName(), tagsData);
    Dataset.save("work/datasets/" + parent + "/" + treeData.relationName(), treeData);
    for (int j = 0; j < 7; j++) {
        Dataset.save("work/datasets/" + parent + "/" + dependsData[j].relationName(), dependsData[j]);
    }
    Dataset.save("work/datasets/" + parent + "/" + extraStats.relationName(), extraStats);

}

From source file:jwebminer2.FeatureValueFileSaver.java

/**
 * Save the given text to the given location in the given format or
 * save the stored feature values, depending on the chosen_file_extension.
 * A progress bar is displayed (although not incremented).
 *
 * @param chosen_file_extension The file extension (corresponding to one
 *                              of the extensions published by the
 *                              getFileFormatExtension method) to use when
 *                              saving data_to_save, and the corresponding
 *                              file format.
 * @param data_to_save          The HTML code displayed on-screen. May be
 *                              null for non-HTML saving.
 * @param save_location         The file to save data_to_save to.
 * @throws Exception            Throws an Exception if the file cannot be
 *                              saved.//from w  ww.jav  a2  s .co  m
 */
public void saveContents(String chosen_file_extension, String data_to_save, File save_location)
        throws Exception {
    // Prepare the progress bar
    SimpleProgressBarDialog progress_bar = new SimpleProgressBarDialog(1, results_panel);

    // Write the whole contents of data_to_save verbatim as an HTML file
    // if an HTML file is to be saved
    if (chosen_file_extension.equals("HTML")) {
        DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods
                .getDataOutputStream(save_location);
        writer.writeBytes(data_to_save);
        writer.close();
    }

    // Only save the table of final feature values itself if a non-HTML
    // file format is to be saved
    else {
        // Access information to store
        double[][] feature_table = results_panel.feature_values;

        String[] column_labels = results_panel.column_labels;
        String[] row_labels = results_panel.row_labels;
        String[] orig_column_labels = column_labels;

        if (AnalysisProcessor.lastfm_enabled && AnalysisProcessor.is_cross_tabulation
                && (AnalysisProcessor.yahoo_application_id != null
                        || AnalysisProcessor.google_license_key != null)) {
            String[] column_labels_lastfm_websearch = new String[2 * column_labels.length];
            for (int i = 0; i < column_labels.length; i++) {
                column_labels_lastfm_websearch[i] = column_labels[i] + "_WS";
                column_labels_lastfm_websearch[i + column_labels.length] = column_labels[i] + "_LastFM";
            }
            column_labels = column_labels_lastfm_websearch;
        } else {
            column_labels = orig_column_labels;
        }

        // Save as tab delimited text file
        if (chosen_file_extension.equals("TXT")) {
            // Calculate the table to save
            String[][] results_table = new String[row_labels.length + 1][column_labels.length + 1];
            results_table[0][0] = "";
            for (int i = 0; i < results_table.length; i++) {
                for (int j = 0; j < results_table[i].length; j++) {
                    if (i == 0) {
                        if (j != 0)
                            results_table[i][j] = column_labels[j - 1];
                    } else {
                        if (j == 0)
                            results_table[i][j] = row_labels[i - 1];
                        else
                            results_table[i][j] = String.valueOf(feature_table[i - 1][j - 1]);
                    }
                }
            }

            // Save the table
            DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods
                    .getDataOutputStream(save_location);
            for (int i = 0; i < results_table.length; i++) {
                for (int j = 0; j < results_table[i].length; j++) {
                    // Write the table entry
                    writer.writeBytes(results_table[i][j]);

                    // Add a tab or a line break
                    if (j == results_table[i].length - 1)
                        writer.writeBytes("\n");
                    else
                        writer.writeBytes("\t");
                }
            }

            // Close the writing stream
            writer.close();
        }

        // Save as ACE XML file
        else if (chosen_file_extension.equals("ACE XML")) {
            // Set the name of the dataset to the name of the file
            // that is tob be saved
            String data_set_name = mckay.utilities.staticlibraries.StringMethods
                    .removeExtension(save_location.getName());

            // Prepare feature definitions and store feature names to
            // put in DataSets
            FeatureDefinition[] feature_definitions = new FeatureDefinition[column_labels.length];
            String[] feature_names = new String[column_labels.length];
            for (int feat = 0; feat < feature_definitions.length; feat++) {
                feature_definitions[feat] = new FeatureDefinition(column_labels[feat], "", false, 1);
                feature_names[feat] = column_labels[feat];
            }

            // Prepare the the DataSets to write
            DataSet[] data_sets = new DataSet[row_labels.length];
            for (int instance = 0; instance < data_sets.length; instance++) {
                // Instantiate the DataSet
                data_sets[instance] = new DataSet();

                // Store the instance names
                data_sets[instance].identifier = row_labels[instance];

                // Store the names of the features
                data_sets[instance].feature_names = feature_names;

                // Store the features for this DataSet as well as the
                // feature names
                double[][] these_feature_values = new double[feature_table[instance].length][1];
                for (int feat = 0; feat < these_feature_values.length; feat++)
                    these_feature_values[feat][0] = feature_table[instance][feat];
                data_sets[instance].feature_values = these_feature_values;

                // Validate, order and compact the DataSet
                data_sets[instance].orderAndCompactFeatures(feature_definitions, true);
            }

            // Save the feature values
            DataSet.saveDataSets(data_sets, feature_definitions, save_location,
                    "Features extracted with jWebMiner 2.0");
        }

        // Save as Weka ARFF file
        else if (chosen_file_extension.equals("Weka ARFF")) {
            // Set the name of the dataset to the name of the file
            // that is to be saved
            String data_set_name = mckay.utilities.staticlibraries.StringMethods
                    .removeExtension(save_location.getName());

            // Set the Attributes (feature names and class names)
            FastVector attributes_vector = new FastVector(column_labels.length + 1); // extra 1 is for class name
            for (int feat = 0; feat < column_labels.length; feat++)
                attributes_vector.addElement(new Attribute(column_labels[feat]));
            FastVector class_names_vector = new FastVector(column_labels.length);
            for (int cat = 0; cat < orig_column_labels.length; cat++)
                class_names_vector.addElement(orig_column_labels[cat]);
            attributes_vector.addElement(new Attribute("Class", class_names_vector));

            // Store attributes in an Instances object
            Instances instances = new Instances(data_set_name, attributes_vector, row_labels.length);
            instances.setClassIndex(instances.numAttributes() - 1);

            // Store the feature values and model classifications
            for (int inst = 0; inst < row_labels.length; inst++) {
                // Initialize an instance
                Instance this_instance = new Instance(instances.numAttributes());
                this_instance.setDataset(instances);
                int current_attribute = 0;

                // Set feature values for the instance
                for (int feat = 0; feat < column_labels.length; feat++)
                    this_instance.setValue(feat, feature_table[inst][feat]);

                // Set the class value for the instance
                // this_instance.setClassValue("a");
                instances.setRelationName("jWebMiner2");

                // Add this instance to instances
                instances.add(this_instance);
            }

            // Prepare the buffer to save to and add comments indicating
            // the names of the rows
            DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods
                    .getDataOutputStream(save_location);
            writer.writeBytes("% INSTANCES (DATA ROWS) BELOW CORRESPOND TO:\n%\n");
            for (int inst = 0; inst < row_labels.length; inst++)
                writer.writeBytes("%    " + (inst + 1) + ") " + row_labels[inst] + "\n");
            writer.writeBytes("%\n");

            // Save the ARFF file
            ArffSaver arff_saver = new ArffSaver();
            arff_saver.setInstances(instances);
            arff_saver.setFile(save_location);
            arff_saver.setDestination(writer);
            try {
                arff_saver.writeBatch();
            } catch (Exception e) {
                throw new Exception(
                        "File only partially saved.\n\nTry resaving the file with a .arff extension.");
            }

            // Close the writer
            writer.close();
        }
    }

    // Terminate the progress bar
    progress_bar.done();
}

From source file:lu.lippmann.cdb.common.gui.dataset.InstancesLoaderDialogFactory.java

License:Open Source License

private static Instances showDialog(final Component parent, final boolean setClass) throws Exception {
    final Preferences prefs = Preferences.userRoot().node("CadralDecisionBuild");
    final String path = prefs.get(REG_KEY, WekaDataAccessUtil.DEFAULT_SAMPLE_DIR);

    final JFileChooser fc = new JFileChooser();
    fc.setCurrentDirectory(new File(path));
    final int returnVal = fc.showOpenDialog(parent);
    if (returnVal == JFileChooser.APPROVE_OPTION) {
        final File file = fc.getSelectedFile();
        if (file != null) {
            prefs.put(REG_KEY, file.getPath());
            final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(file);
            final Attribute defaultClassAttr = ds.classIndex() >= 0 ? ds.classAttribute() : ds.attribute(0);
            ds.setClassIndex(-1);//from  w  ww.ja v a 2 s.  co m
            ds.setRelationName(file.getPath());
            final List<String> attributesNames = new ArrayList<String>();
            final Enumeration<?> e = ds.enumerateAttributes();
            while (e.hasMoreElements()) {
                final Attribute attr = (Attribute) e.nextElement();
                attributesNames.add(attr.name());
            }

            if (setClass) {
                final String s = (String) JOptionPane.showInputDialog(parent,
                        "Select the class attribute for '" + file.getName() + "' (default:'"
                                + defaultClassAttr.name() + "'): ",
                        "Class selection", JOptionPane.QUESTION_MESSAGE, null, // icon
                        attributesNames.toArray(), attributesNames.get(attributesNames.size() - 1));
                if (s != null) {
                    ds.setClass(ds.attribute(s));
                } else {
                    //Otherwise no class defined and CACHE attributeClass => No class index defined after cancel + retry
                    ds.setClass(defaultClassAttr);
                    return null;
                }
            } else {
                ds.setClass(defaultClassAttr);
            }
            return ds;
        } else
            throw new Exception();
    } else
        return null;
}

From source file:mao.datamining.RemoveUselessColumnsByMissingValues.java

License:Open Source License

/**
 * Signify that this batch of input to the filter is finished.
 *
 * @return true if there are instances pending output
 * @throws Exception if no input format defined
 *///from w  w  w  . jav a  2s  .c  om
public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
        throw new IllegalStateException("No input instance format defined");
    }
    if (m_removeFilter == null) {

        // establish attributes to remove from first batch

        Instances toFilter = getInputFormat();
        int[] attsToDelete = new int[toFilter.numAttributes()];
        int numToDelete = 0;
        for (int i = 0; i < toFilter.numAttributes(); i++) {
            if (i == toFilter.classIndex())
                continue; // skip class
            AttributeStats stats = toFilter.attributeStats(i);

            //remove those attributes who has high ratio of missing values
            if ((stats.missingCount * 100) / stats.totalCount > m_maxMissingPercentage) {
                //            System.out.println("stats.missingPercentage: " + (stats.missingCount*100)/stats.totalCount+"%");            
                attsToDelete[numToDelete++] = i;
            }
            //remove those columns defined in the list by manual check
            if (this.column2DeleteSet.contains(toFilter.attribute(i).name())) {
                attsToDelete[numToDelete++] = i;
            }
        }

        int[] finalAttsToDelete = new int[numToDelete];
        System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);

        m_removeFilter = new Remove();
        m_removeFilter.setAttributeIndicesArray(finalAttsToDelete);
        m_removeFilter.setInvertSelection(false);
        m_removeFilter.setInputFormat(toFilter);

        for (int i = 0; i < toFilter.numInstances(); i++) {
            m_removeFilter.input(toFilter.instance(i));
        }
        m_removeFilter.batchFinished();

        Instance processed;
        Instances outputDataset = m_removeFilter.getOutputFormat();

        // restore old relation name to hide attribute filter stamp
        outputDataset.setRelationName(toFilter.relationName());

        setOutputFormat(outputDataset);
        while ((processed = m_removeFilter.output()) != null) {
            processed.setDataset(outputDataset);
            push(processed);
        }
    }
    flushInput();

    m_NewBatch = true;
    return (numPendingOutput() != 0);
}

From source file:meka.core.MLUtils.java

License:Open Source License

/**
 * Fixes the relation name by adding the "-C" attribute to it if necessary.
 *
 * @param data the dataset to fix//w ww .j  a  v  a 2  s .c o  m
 * @param numClassAtts the number of class attributes (0 for none, &gt;0 for attributes at start, &lt;0 for attributes at end)
 */
public static void fixRelationName(Instances data, int numClassAtts) {
    if (data.relationName().indexOf(":") == -1)
        data.setRelationName(data.relationName() + ": -C " + numClassAtts);
}