Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:sirius.predictor.main.PredictorFrame.java

License:Open Source License

private void runType3Classifier(ClassifierData classifierData) {
    /*/* w  ww. ja v  a2 s .  co m*/
     * This is for type3 classifier
     * Note that all position and motif list only does not apply to this classifier as
     * it will only give one score for each sequence
     */
    if (sequenceNameTableModel.getRowCount() < 1) {
        JOptionPane.showMessageDialog(this, "Please load File first!", "No Sequence",
                JOptionPane.INFORMATION_MESSAGE);
        return;
    }
    if (loadFastaFileMenuItem.getState() == false) {
        JOptionPane.showMessageDialog(this, "Please load Fasta File! Currently, you have score file!",
                "Wrong File Format", JOptionPane.INFORMATION_MESSAGE);
        return;
    }
    if (onAllPositionsMenuItem.getState() == false) {
        JOptionPane.showMessageDialog(this, "For type 3 classifier, it make only one prediction a sequence",
                "Information", JOptionPane.INFORMATION_MESSAGE);
    }
    try {
        BufferedWriter output = new BufferedWriter(new FileWriter(
                outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_"
                        + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
        Classifier classifierOne = classifierData.getClassifierOne();
        //Reading and Storing the featureList
        Instances inst = classifierData.getInstances();
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }
        //Going through each and every sequence
        for (int x = 0; x < sequenceNameTableModel.getRowCount(); x++) {
            if (stopClassifier == true) {
                statusPane.setText("Running of Classifier Stopped!");
                stopClassifier = false;
                output.close();
                return;
            }
            //if(x%100 == 0)
            statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierOne @ " + x
                    + " / " + sequenceNameTableModel.getRowCount());
            //Header              
            output.write(sequenceNameTableModel.getHeader(x));
            output.newLine();
            output.write(sequenceNameTableModel.getSequence(x));
            output.newLine();
            //Sequence Score -> index-score, index-score
            String sequence = sequenceNameTableModel.getSequence(x);
            Instance tempInst;
            tempInst = new Instance(inst.numAttributes());
            tempInst.setDataset(inst);
            for (int z = 0; z < inst.numAttributes() - 1; z++) {
                //-1 because class attribute can be ignored
                //Give the sequence and the featureList to get the feature freqs on the sequence
                Object obj = GenerateArff.getMatchCount("+1_Index(-1)", sequence, featureDataArrayList.get(z),
                        classifierData.getScoringMatrixIndex(), classifierData.getCountingStyleIndex(),
                        classifierData.getScoringMatrix());
                if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                    tempInst.setValue(z, (Integer) obj);
                else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                    tempInst.setValue(z, (Double) obj);
                else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                    tempInst.setValue(z, (String) obj);
                else {
                    output.close();
                    throw new Error("Unknown: " + obj.getClass().getName());
                }
            }
            //note that pos or neg does not matter as this is not used
            tempInst.setValue(inst.numAttributes() - 1, "pos");
            try {
                double[] results = classifierOne.distributionForInstance(tempInst);
                output.write("0=" + results[0]);
            } catch (Exception e) {
                //this is to ensure that the run will continue              
                output.write("0=-0.0");
                //change throw error to screen output if i want the run to continue
                System.err
                        .println("Exception has Occurred for classifierOne.distributionForInstance(tempInst);");
            }
            output.newLine();
            output.flush();
        }
        output.flush();
        output.close();

        statusPane.setText("ClassifierOne finished running...");
        loadScoreFile(outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName()
                + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores");
    } catch (Exception e) {
        JOptionPane.showMessageDialog(null, "Exception Occured", "Error", JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    }
}

From source file:sirius.predictor.main.PredictorFrame.java

License:Open Source License

private void runClassifier(ClassifierData classifierData, boolean allPositions) {
    //this method is for type 1 classifier with all positions and motif list
    //and type 2 classifier with all positions
    if (sequenceNameTableModel.getRowCount() < 1) {
        JOptionPane.showMessageDialog(this, "Please load File first!", "No Sequence",
                JOptionPane.INFORMATION_MESSAGE);
        return;//  ww  w  .  java 2 s  .  c om
    }
    if (loadFastaFileMenuItem.getState() == false) {
        JOptionPane.showMessageDialog(this, "Please load Fasta File! Currently, you have score file!",
                "Wrong File Format", JOptionPane.INFORMATION_MESSAGE);
        return;
    }
    if (onAllPositionsMenuItem.getState() == false && motifListTableModel.getSize() == 0) {
        JOptionPane.showMessageDialog(this, "There are no Motifs chosen in Motif List!", "No Motifs",
                JOptionPane.INFORMATION_MESSAGE);
        MotifListDialog dialog = new MotifListDialog(motifListTableModel);
        dialog.setLocationRelativeTo(this);
        dialog.setVisible(true);
        return;
    }
    while (outputDirectory == null) {
        JOptionPane.showMessageDialog(this, "Please set output directory first!", "Output Directory not set",
                JOptionPane.INFORMATION_MESSAGE);
        setOutputDirectory();
        //return;
    }
    try {
        BufferedWriter output = new BufferedWriter(new FileWriter(
                outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_"
                        + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
        Classifier classifierOne = classifierData.getClassifierOne();
        int leftMostPosition = classifierData.getLeftMostPosition();
        int rightMostPosition = classifierData.getRightMostPosition();
        //Reading and Storing the featureList
        Instances inst = classifierData.getInstances();
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        for (int x = 0; x < sequenceNameTableModel.getRowCount(); x++) {
            if (stopClassifier == true) {
                statusPane.setText("Running of Classifier Stopped!");
                stopClassifier = false;
                output.close();
                return;
            }
            //if(x%100 == 0)
            statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierOne @ " + x
                    + " / " + sequenceNameTableModel.getRowCount());
            //Header              
            output.write(sequenceNameTableModel.getHeader(x));
            output.newLine();
            output.write(sequenceNameTableModel.getSequence(x));
            output.newLine();
            //Sequence Score -> index-score, index-score
            String sequence = sequenceNameTableModel.getSequence(x);
            int minSequenceLengthRequired;
            int targetLocationIndex;
            if (leftMostPosition < 0 && rightMostPosition > 0) {// -ve and +ve
                minSequenceLengthRequired = (leftMostPosition * -1) + rightMostPosition;
                targetLocationIndex = (leftMostPosition * -1);
            } else if (leftMostPosition < 0 && rightMostPosition < 0) {//-ve and -ve
                minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1;
                targetLocationIndex = (leftMostPosition * -1);
            } else {//+ve and +ve
                minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1;
                targetLocationIndex = (leftMostPosition * -1);
            }
            boolean firstEntryForClassifierOne = true;
            for (int y = 0; y + (minSequenceLengthRequired - 1) < sequence.length(); y++) {
                //Check if targetLocation match any motif in motif List
                if (allPositions == false && motifListTableModel
                        .gotMotifMatch(sequence.substring(y + 0, y + targetLocationIndex)) == false)
                    continue;
                String line2 = sequence.substring(y + 0, y + minSequenceLengthRequired);
                Instance tempInst;
                tempInst = new Instance(inst.numAttributes());
                tempInst.setDataset(inst);
                for (int z = 0; z < inst.numAttributes() - 1; z++) {
                    //-1 because class attribute can be ignored
                    //Give the sequence and the featureList to get the feature freqs on the sequence
                    Object obj = GenerateArff.getMatchCount("+1_Index(" + targetLocationIndex + ")", line2,
                            featureDataArrayList.get(z), classifierData.getScoringMatrixIndex(),
                            classifierData.getCountingStyleIndex(), classifierData.getScoringMatrix());
                    if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                        tempInst.setValue(z, (Integer) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                        tempInst.setValue(z, (Double) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                        tempInst.setValue(z, (String) obj);
                    else {
                        output.close();
                        throw new Error("Unknown: " + obj.getClass().getName());
                    }
                }
                //note that pos or neg does not matter as this is not used
                tempInst.setValue(inst.numAttributes() - 1, "neg");
                double[] results = classifierOne.distributionForInstance(tempInst);
                if (firstEntryForClassifierOne)
                    firstEntryForClassifierOne = false;
                else
                    output.write(",");
                output.write(y + targetLocationIndex + "=" + results[0]);
            }
            output.newLine();
            output.flush();
        }
        output.flush();
        output.close();

        statusPane.setText("ClassifierOne finished running...");

        //Run classifier Two if it is type 2
        if (classifierData.getClassifierType() == 2) {
            BufferedWriter output2 = new BufferedWriter(new FileWriter(
                    outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName()
                            + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
            BufferedReader input2 = new BufferedReader(new FileReader(
                    outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName()
                            + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
            Classifier classifierTwo = classifierData.getClassifierTwo();
            Instances inst2 = classifierData.getInstances2();
            int setUpstream = classifierData.getSetUpstream();
            int setDownstream = classifierData.getSetDownstream();
            int minScoreWindowRequired;
            if (setUpstream < 0 && setDownstream < 0) {//-ve and -ve
                minScoreWindowRequired = setDownstream - setUpstream + 1;
            } else if (setUpstream < 0 && setDownstream > 0) {//-ve and +ve
                minScoreWindowRequired = (setUpstream * -1) + setDownstream;
            } else {//+ve and +ve
                minScoreWindowRequired = setDownstream - setUpstream + 1;
            }
            String lineHeader;
            String lineSequence;
            int lineCounter2 = 0;
            while ((lineHeader = input2.readLine()) != null) {
                if (stopClassifier == true) {
                    statusPane.setText("Running of Classifier Stopped!");
                    stopClassifier = false;
                    output2.close();
                    input2.close();
                    return;
                }
                //if(lineCounter2%100 == 0)
                statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierTwo @ "
                        + lineCounter2 + " / " + sequenceNameTableModel.getRowCount());
                lineSequence = input2.readLine();
                output2.write(lineHeader);
                output2.newLine();
                output2.write(lineSequence);
                output2.newLine();
                StringTokenizer locationScore = new StringTokenizer(input2.readLine(), ",");
                int totalTokens = locationScore.countTokens();
                String[][] scores = new String[totalTokens][2];
                int scoreIndex = 0;
                while (locationScore.hasMoreTokens()) {
                    StringTokenizer locationScoreToken = new StringTokenizer(locationScore.nextToken(), "=");
                    scores[scoreIndex][0] = locationScoreToken.nextToken();//location
                    scores[scoreIndex][1] = locationScoreToken.nextToken();//score
                    scoreIndex++;
                }
                int targetLocationIndex2;
                if (setUpstream == 0 || setDownstream == 0) {
                    output2.close();
                    input2.close();
                    throw new Exception("setUpstream == 0 || setDownstream == 0");
                }
                if (setUpstream < 0) {
                    targetLocationIndex2 = Integer.parseInt(scores[0][0]) + (-setUpstream);
                } else {//setUpstream > 0
                    targetLocationIndex2 = Integer.parseInt(scores[0][0]); //first location
                }
                for (int x = 0; x + minScoreWindowRequired - 1 < totalTokens; x++) {
                    //+1 is for the class index
                    if (x != 0)
                        output2.write(",");
                    Instance tempInst2 = new Instance(minScoreWindowRequired + 1);
                    tempInst2.setDataset(inst2);
                    for (int y = 0; y < minScoreWindowRequired; y++) {
                        tempInst2.setValue(y, Double.parseDouble(scores[x + y][1]));
                    }
                    tempInst2.setValue(tempInst2.numAttributes() - 1, "pos");
                    double[] results = classifierTwo.distributionForInstance(tempInst2);
                    output2.write(targetLocationIndex2 + "=" + results[0]);
                    targetLocationIndex2++;
                }
                lineCounter2++;
                output2.newLine();
            }
            input2.close();
            output2.close();
            statusPane.setText("ClassifierTwo finished running...");
        }
        if (classifierData.getClassifierType() == 1)
            loadScoreFile(
                    outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName()
                            + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores");
        else
            loadScoreFile(
                    outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName()
                            + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores");
    } catch (Exception e) {
        JOptionPane.showMessageDialog(null, "Exception Occured", "Error", JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    }
}

From source file:sirius.predictor.main.PredictorFrame.java

License:Open Source License

private void runType2ClassifierWithMotifList(ClassifierData classifierData) {
    //Checking..       
    if (sequenceNameTableModel.getRowCount() < 1) {
        JOptionPane.showMessageDialog(this, "Please load File first!", "No Sequence",
                JOptionPane.INFORMATION_MESSAGE);
        return;//from w w w  . j  a v  a 2s  . c o  m
    }
    if (loadFastaFileMenuItem.getState() == false) {
        JOptionPane.showMessageDialog(this, "Please load Fasta File! Currently, you have score file!",
                "Wrong File Format", JOptionPane.INFORMATION_MESSAGE);
        return;
    }
    if (motifListTableModel.getSize() == 0) {
        JOptionPane.showMessageDialog(this, "There are no Motifs chosen in Motif List!", "No Motifs",
                JOptionPane.INFORMATION_MESSAGE);
        MotifListDialog dialog = new MotifListDialog(motifListTableModel);
        dialog.setLocationRelativeTo(this);
        dialog.setVisible(true);
        return;
    }
    //Proper running start
    try {
        //classifierOne score output
        BufferedWriter output = new BufferedWriter(new FileWriter(
                outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_"
                        + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
        Classifier classifierOne = classifierData.getClassifierOne();
        int leftMostPosition = classifierData.getLeftMostPosition();
        int rightMostPosition = classifierData.getRightMostPosition();
        //Reading and Storing the featureList
        Instances inst = classifierData.getInstances();
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }
        //initialization for type 2 classifier               
        BufferedWriter output2 = new BufferedWriter(new FileWriter(
                outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName() + "_"
                        + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
        int setUpstream = classifierData.getSetUpstream();
        int setDownstream = classifierData.getSetDownstream();
        int minScoreWindowRequired;
        if (setUpstream < 0 && setDownstream < 0) {//-ve and -ve
            minScoreWindowRequired = setDownstream - setUpstream + 1;
        } else if (setUpstream < 0 && setDownstream > 0) {//-ve and +ve
            minScoreWindowRequired = (setUpstream * -1) + setDownstream;
        } else {//+ve and +ve
            minScoreWindowRequired = setDownstream - setUpstream + 1;
        }
        Classifier classifierTwo = classifierData.getClassifierTwo();
        Instances inst2 = classifierData.getInstances2();
        if (setUpstream == 0 || setDownstream == 0) {
            output.close();
            output2.close();
            throw new Exception("setUpstream == 0 || setDownstream == 0");
        }
        //for each sequence
        for (int x = 0; x < sequenceNameTableModel.getRowCount(); x++) {
            if (stopClassifier == true) {
                statusPane.setText("Running of Classifier Stopped!");
                stopClassifier = false;
                output.close();
                output2.close();
                return;
            }
            //if(x%100 == 0)
            statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierOne @ " + x
                    + " / " + sequenceNameTableModel.getRowCount());
            //Header              
            output.write(sequenceNameTableModel.getHeader(x));
            output.newLine();
            output.write(sequenceNameTableModel.getSequence(x));
            output.newLine();
            output2.write(sequenceNameTableModel.getHeader(x));
            output2.newLine();
            output2.write(sequenceNameTableModel.getSequence(x));
            output2.newLine();
            //Sequence Score -> index-score, index-score
            String sequence = sequenceNameTableModel.getSequence(x);
            int minSequenceLengthRequired;
            int targetLocationIndex;
            //set the targetLocationIndex and minSequenceLengthRequired
            if (leftMostPosition < 0 && rightMostPosition > 0) {// -ve and +ve
                minSequenceLengthRequired = (leftMostPosition * -1) + rightMostPosition;
                targetLocationIndex = (leftMostPosition * -1);
            } else if (leftMostPosition < 0 && rightMostPosition < 0) {//-ve and -ve
                minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1;
                targetLocationIndex = (leftMostPosition * -1);
            } else {//+ve and +ve
                minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1;
                targetLocationIndex = (leftMostPosition * -1);
            }
            //This hashtable is used to ensure that on positions where predictions are already made,
            //we just skip. This will happen only if it is a type 2 classifier
            Hashtable<Integer, Double> scoreTable = new Hashtable<Integer, Double>();
            boolean firstEntryForClassifierOne = true;
            boolean firstEntryForClassifierTwo = true;
            for (int y = 0; y + (minSequenceLengthRequired - 1) < sequence.length(); y++) {
                int endPoint = y;//endPoint should be the exact position
                int currentY = y;
                int startPoint = y;
                //run only on Motifs?               
                if (onMotifsOnlyMenuItem.getState()) {
                    //Check if targetLocation match any motif in motif List
                    if (motifListTableModel
                            .gotMotifMatch(sequence.substring(y + 0, y + targetLocationIndex)) == false)
                        continue; //position not found in motif list
                    else
                        //rollback to upstream and make prediction all the way till downstream
                        //needed for type 2 classifier
                        currentY += setUpstream;
                    if (setUpstream > 0)
                        currentY--;
                    startPoint = currentY;
                    //note that y starts from 0 so y is surely >= 0
                    endPoint += setDownstream;
                    if (setDownstream > 0)
                        endPoint--;
                    //check still within bound of the sequence
                    if (startPoint < 0 || endPoint >= sequence.length() - (minSequenceLengthRequired - 1))
                        continue;//out of bounds                  
                }
                while (currentY <= endPoint) {
                    if (scoreTable.get(currentY + targetLocationIndex) != null) {
                        currentY++;
                        continue;
                    }
                    String line2 = sequence.substring(currentY + 0, currentY + minSequenceLengthRequired);
                    Instance tempInst;
                    tempInst = new Instance(inst.numAttributes());
                    tempInst.setDataset(inst);
                    for (int z = 0; z < inst.numAttributes() - 1; z++) {
                        //-1 because class attribute can be ignored
                        //Give the sequence and the featureList to get the feature freqs on the sequence
                        Object obj = GenerateArff.getMatchCount("+1_Index(" + targetLocationIndex + ")", line2,
                                featureDataArrayList.get(z), classifierData.getScoringMatrixIndex(),
                                classifierData.getCountingStyleIndex(), classifierData.getScoringMatrix());
                        if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                            tempInst.setValue(z, (Integer) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                            tempInst.setValue(z, (Double) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                            tempInst.setValue(z, (String) obj);
                        else {
                            output.close();
                            output2.close();
                            throw new Error("Unknown: " + obj.getClass().getName());
                        }
                    }
                    //note that pos or neg does not matter as this is not used
                    tempInst.setValue(inst.numAttributes() - 1, "neg");
                    double[] results = classifierOne.distributionForInstance(tempInst);
                    if (firstEntryForClassifierOne)
                        firstEntryForClassifierOne = false;
                    else
                        output.write(",");
                    output.write(currentY + targetLocationIndex + "=" + results[0]);
                    scoreTable.put(currentY + targetLocationIndex, results[0]);
                    currentY++;
                }
                Instance tempInst2 = new Instance(minScoreWindowRequired + 1);//+1 for class attribute
                tempInst2.setDataset(inst2);
                int indexForClassifier2Inst = 0;
                for (int z = startPoint; z <= endPoint; z++) {
                    tempInst2.setValue(indexForClassifier2Inst, scoreTable.get(targetLocationIndex + z));
                    indexForClassifier2Inst++;
                }
                //note that pos or neg does not matter as this is not used
                tempInst2.setValue(tempInst2.numAttributes() - 1, "pos");
                double[] results = classifierTwo.distributionForInstance(tempInst2);
                if (firstEntryForClassifierTwo == true)
                    firstEntryForClassifierTwo = false;
                else
                    output2.write(",");
                output2.write(y + targetLocationIndex + "=" + results[0]);
            } //end of for loop            
            output2.newLine();
            output2.flush();
            output.newLine();
            output.flush();
        }
        output.close();
        output2.close();

        statusPane.setText("Classifier Finished running...");
        loadScoreFile(outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName()
                + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores");
    } catch (Exception e) {
        JOptionPane.showMessageDialog(null, "Exception Occured", "Error", JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    }
}

From source file:sirius.trainer.features.ClassifierFeature.java

License:Open Source License

public double compute(FastaFormat fastaFormat) throws Exception {
    //This is for type3 classifier - one prediction per sequence   
    //Reading and Storing the featureList
    Instances inst = classifier.getInstances();
    ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
    for (int x = 0; x < inst.numAttributes() - 1; x++) {
        //-1 because class attribute must be ignored
        featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
    }//from   ww  w .j a va2  s.  c om
    //Sequence Score -> index-score, index-score
    String sequence = fastaFormat.getSequence();
    Instance tempInst;
    tempInst = new Instance(inst.numAttributes());
    tempInst.setDataset(inst);
    for (int z = 0; z < inst.numAttributes() - 1; z++) {
        //-1 because class attribute can be ignored
        //Give the sequence and the featureList to get the feature freqs on the sequence
        Object obj = GenerateArff.getMatchCount("+1_Index(-1)", sequence, featureDataArrayList.get(z),
                this.classifier.getScoringMatrixIndex(), this.classifier.getCountingStyleIndex(),
                this.classifier.getScoringMatrix());
        if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
            tempInst.setValue(z, (Integer) obj);
        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
            tempInst.setValue(z, (Double) obj);
        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
            tempInst.setValue(z, (String) obj);
        else
            throw new Error("Unknown: " + obj.getClass().getName());
    }
    //note that pos or neg does not matter as this is not used
    tempInst.setValue(inst.numAttributes() - 1, "pos");
    this.classifier.getClassifierOne().toString();
    double[] results = this.classifier.getClassifierOne().distributionForInstance(tempInst);
    return results[0];
}

From source file:sirius.trainer.step4.DatasetGenerator.java

License:Open Source License

public static boolean generateDataset2(JInternalFrame parent, ApplicationData applicationData,
        int classifierTwoUpstream, int classifierTwoDownstream, Classifier classifierOne) {
    try {//from  w w  w . j a v  a  2 s .  c  o m
        StatusPane statusPane = applicationData.getStatusPane();

        int positiveDataset2FromInt = applicationData.getPositiveDataset2FromField();
        int positiveDataset2ToInt = applicationData.getPositiveDataset2ToField();
        int negativeDataset2FromInt = applicationData.getNegativeDataset2FromField();
        int negativeDataset2ToInt = applicationData.getNegativeDataset2ToField();

        int totalDataset2PositiveInstances = positiveDataset2ToInt - positiveDataset2FromInt + 1;
        int totalDataset2NegativeInstances = negativeDataset2ToInt - negativeDataset2FromInt + 1;
        int totalDataset2Instances = totalDataset2PositiveInstances + totalDataset2NegativeInstances;

        int scoringMatrixIndex = applicationData.getScoringMatrixIndex();
        int countingStyleIndex = applicationData.getCountingStyleIndex();

        //Generate the header for Dataset2.arff
        BufferedWriter dataset2OutputFile = new BufferedWriter(
                new FileWriter(applicationData.getWorkingDirectory() + File.separator + "Dataset2.arff"));
        dataset2OutputFile.write("@relation 'Dataset2.arff' ");
        dataset2OutputFile.newLine();
        dataset2OutputFile.newLine();
        dataset2OutputFile.flush();
        for (int x = classifierTwoUpstream; x <= classifierTwoDownstream; x++) {
            if (x != 0) {//This statment is used because in sequence position only -1,+1 dun have 0
                dataset2OutputFile.write("@attribute (" + x + ") numeric");
                dataset2OutputFile.newLine();
                dataset2OutputFile.flush();
            }
        }
        if (positiveDataset2FromInt > 0 && negativeDataset2FromInt > 0)
            dataset2OutputFile.write("@attribute Class {pos,neg}");
        else if (positiveDataset2FromInt > 0 && negativeDataset2FromInt == 0)
            dataset2OutputFile.write("@attribute Class {pos}");
        else if (positiveDataset2FromInt == 0 && negativeDataset2FromInt > 0)
            dataset2OutputFile.write("@attribute Class {neg}");
        dataset2OutputFile.newLine();
        dataset2OutputFile.newLine();
        dataset2OutputFile.write("@data");
        dataset2OutputFile.newLine();
        dataset2OutputFile.newLine();
        dataset2OutputFile.flush();

        //Generating an Instance given a sequence with the current attributes
        //for dataset2.arff

        //Need this for parameter setting for tempInst
        Instances inst = applicationData.getDataset1Instances();
        inst.deleteAttributeType(Attribute.STRING);
        FastaFileManipulation fastaFile = new FastaFileManipulation(
                applicationData.getPositiveStep1TableModel(), applicationData.getNegativeStep1TableModel(),
                positiveDataset2FromInt, positiveDataset2ToInt, negativeDataset2FromInt, negativeDataset2ToInt,
                applicationData.getWorkingDirectory());

        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        //Reading the fastaFile         
        int lineCounter = 0;
        String _class = "pos";
        FastaFormat fastaFormat;
        while ((fastaFormat = fastaFile.nextSequence(_class)) != null) {
            if (applicationData.terminateThread == true) {
                statusPane.setText("Interrupted - Classifier Two Training Not Complete");
                dataset2OutputFile.close();
                return false;
            }
            lineCounter++;//Putting it here will mean if lineCounter is x then line == sequence x
            //if((lineCounter % 100) == 0){                 
            dataset2OutputFile.flush();
            statusPane.setText("Generating Dataset2.arff.. @ " + lineCounter + " / " + totalDataset2Instances
                    + " Sequences");
            //}
            //For each sequence, you want to shift from upstream till downstream 
            //ie changing the +1 location
            //to get the scores given by classifier one so that you can use it to train classifier two later
            //Doing shift from upstream till downstream                             
            SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(),
                    classifierTwoUpstream, classifierTwoDownstream);
            String line2;
            while ((line2 = seq.nextShift()) != null) {
                Instance tempInst;
                tempInst = new Instance(inst.numAttributes());
                tempInst.setDataset(inst);
                for (int x = 0; x < inst.numAttributes() - 1; x++) {
                    //-1 because class attribute can be ignored
                    //Give the sequence and the featureList to get the feature freqs on the sequence
                    Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                            featureDataArrayList.get(x), scoringMatrixIndex, countingStyleIndex,
                            applicationData.getScoringMatrix());
                    if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                        tempInst.setValue(x, (Integer) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                        tempInst.setValue(x, (Double) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                        tempInst.setValue(x, (String) obj);
                    else {
                        dataset2OutputFile.close();
                        throw new Error("Unknown: " + obj.getClass().getName());
                    }
                }
                tempInst.setValue(inst.numAttributes() - 1, _class);
                double[] results = classifierOne.distributionForInstance(tempInst);
                dataset2OutputFile.write("" + results[0] + ",");
            }
            dataset2OutputFile.write(_class);
            dataset2OutputFile.newLine();
            if (lineCounter == totalDataset2PositiveInstances)
                _class = "neg";
        }
        dataset2OutputFile.close();
        fastaFile.cleanUp();
    } catch (Exception e) {
        e.printStackTrace();
        JOptionPane.showMessageDialog(parent, e.getMessage(), "ERROR", JOptionPane.ERROR_MESSAGE);
        applicationData.getStatusPane().setText("Error - Classifier Two Training Not Complete");
        return false;
    }
    return true;
}

From source file:sirius.trainer.step4.RunClassifier.java

License:Open Source License

public static Classifier startClassifierOne(JInternalFrame parent, ApplicationData applicationData,
        JTextArea classifierOneDisplayTextArea, GenericObjectEditor m_ClassifierEditor, GraphPane myGraph,
        boolean test, ClassifierResults classifierResults, int range, double threshold) {
    try {//from w ww  . ja v  a  2 s  .c o m
        StatusPane statusPane = applicationData.getStatusPane();

        long totalTimeStart = System.currentTimeMillis(), totalTimeElapsed;
        //Setting up training dataset 1 for classifier one
        statusPane.setText("Setting up...");
        //Load Dataset1 Instances
        Instances inst = new Instances(applicationData.getDataset1Instances());
        inst.setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
        applicationData.getDataset1Instances()
                .setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
        // for timing
        long trainTimeStart = 0, trainTimeElapsed = 0;
        Classifier classifierOne = (Classifier) m_ClassifierEditor.getValue();
        statusPane.setText("Training Classifier One... May take a while... Please wait...");
        trainTimeStart = System.currentTimeMillis();
        inst.deleteAttributeType(Attribute.STRING);
        classifierOne.buildClassifier(inst);
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;

        String classifierName = m_ClassifierEditor.getValue().getClass().getName();
        classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName);
        classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                applicationData.getWorkingDirectory() + File.separator + "Dataset1.arff");
        classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");

        if (test == false) {
            statusPane.setText("Classifier One Training Completed...Done...");
            return classifierOne;
        }
        if (applicationData.terminateThread == true) {
            statusPane.setText("Interrupted - Classifier One Training Completed");
            return classifierOne;
        }
        //Running classifier one on dataset3
        if (statusPane != null)
            statusPane.setText("Running ClassifierOne on Dataset 3..");
        //Step1TableModel positiveStep1TableModel = applicationData.getPositiveStep1TableModel();
        //Step1TableModel negativeStep1TableModel = applicationData.getNegativeStep1TableModel();   
        int positiveDataset3FromInt = applicationData.getPositiveDataset3FromField();
        int positiveDataset3ToInt = applicationData.getPositiveDataset3ToField();
        int negativeDataset3FromInt = applicationData.getNegativeDataset3FromField();
        int negativeDataset3ToInt = applicationData.getNegativeDataset3ToField();

        //Generate the header for ClassifierOne.scores on Dataset3                
        BufferedWriter dataset3OutputFile = new BufferedWriter(new FileWriter(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierOne.scores"));
        if (m_ClassifierEditor.getValue() instanceof OptionHandler)
            classifierName += " "
                    + Utils.joinOptions(((OptionHandler) m_ClassifierEditor.getValue()).getOptions());

        FastaFileManipulation fastaFile = new FastaFileManipulation(
                applicationData.getPositiveStep1TableModel(), applicationData.getNegativeStep1TableModel(),
                positiveDataset3FromInt, positiveDataset3ToInt, negativeDataset3FromInt, negativeDataset3ToInt,
                applicationData.getWorkingDirectory());

        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        //Reading the fastaFile         
        int lineCounter = 0;
        String _class = "pos";
        int totalDataset3PositiveInstances = positiveDataset3ToInt - positiveDataset3FromInt + 1;
        FastaFormat fastaFormat;
        while ((fastaFormat = fastaFile.nextSequence(_class)) != null) {
            if (applicationData.terminateThread == true) {
                statusPane.setText("Interrupted - Classifier One Training Completed");
                dataset3OutputFile.close();
                return classifierOne;
            }
            lineCounter++;//Putting it here will mean if lineCounter is x then line == sequence x
            dataset3OutputFile.write(fastaFormat.getHeader());
            dataset3OutputFile.newLine();
            dataset3OutputFile.write(fastaFormat.getSequence());
            dataset3OutputFile.newLine();
            //if((lineCounter % 100) == 0){                                 
            statusPane.setText("Running Classifier One on Dataset 3.. @ " + lineCounter + " / "
                    + applicationData.getTotalSequences(3) + " Sequences");
            //}

            // for +1 index being -1, only make one prediction for the whole sequence             
            if (fastaFormat.getIndexLocation() == -1) {
                //Should not have reached here...
                dataset3OutputFile.close();
                throw new Exception("SHOULD NOT HAVE REACHED HERE!!");
            } else {// for +1 index being non -1, make prediction on every possible position
                    //For each sequence, you want to shift from predictPositionFrom till predictPositionTo
                    //ie changing the +1 location
                    //to get the scores given by classifier one so that 
                    //you can use it to train classifier two later
                    //Doing shift from predictPositionFrom till predictPositionTo                
                int predictPosition[];
                predictPosition = fastaFormat.getPredictPositionForClassifierOne(
                        applicationData.getLeftMostPosition(), applicationData.getRightMostPosition());

                SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(),
                        predictPosition[0], predictPosition[1]);
                String line2;
                int currentPosition = predictPosition[0];
                dataset3OutputFile.write(_class);
                while ((line2 = seq.nextShift()) != null) {
                    Instance tempInst;
                    tempInst = new Instance(inst.numAttributes());
                    tempInst.setDataset(inst);
                    for (int x = 0; x < inst.numAttributes() - 1; x++) {
                        //-1 because class attribute can be ignored
                        //Give the sequence and the featureList to get the feature freqs on the sequence
                        Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                                featureDataArrayList.get(x), applicationData.getScoringMatrixIndex(),
                                applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix());
                        if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                            tempInst.setValue(x, (Integer) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                            tempInst.setValue(x, (Double) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                            tempInst.setValue(x, (String) obj);
                        else {
                            dataset3OutputFile.close();
                            throw new Error("Unknown: " + obj.getClass().getName());
                        }
                    }
                    tempInst.setValue(inst.numAttributes() - 1, _class);
                    double[] results = classifierOne.distributionForInstance(tempInst);
                    dataset3OutputFile.write("," + currentPosition + "=" + results[0]);
                    //AHFU_DEBUG 
                    /*if(currentPosition >= setClassifierTwoUpstreamInt && currentPosition <= setClassifierTwoDownstreamInt)
                       testClassifierTwoArff.write(results[0] + ",");*/
                    //AHFU_DEBUG_END
                    currentPosition++;
                    if (currentPosition == 0)
                        currentPosition++;
                } // end of while((line2 = seq.nextShift())!=null) 
                  //AHFU_DEBUG
                  /*testClassifierTwoArff.write(_class);
                  testClassifierTwoArff.newLine();
                  testClassifierTwoArff.flush();*/
                  //AHFU_DEBUG_END
                dataset3OutputFile.newLine();
                dataset3OutputFile.flush();
                if (lineCounter == totalDataset3PositiveInstances)
                    _class = "neg";
            } //end of inside non -1                                  
        } // end of while((fastaFormat = fastaFile.nextSequence(_class))!=null)       
        dataset3OutputFile.close();
        PredictionStats classifierOneStatsOnBlindTest = new PredictionStats(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierOne.scores", range,
                threshold);
        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                        + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");
        classifierOneStatsOnBlindTest.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        applicationData.setClassifierOneStats(classifierOneStatsOnBlindTest);
        myGraph.setMyStats(classifierOneStatsOnBlindTest);
        statusPane.setText("Done!");
        fastaFile.cleanUp();
        return classifierOne;
    } catch (Exception ex) {
        ex.printStackTrace();
        JOptionPane.showMessageDialog(parent, ex.getMessage() + "Classifier One on Blind Test Set",
                "Evaluate classifier", JOptionPane.ERROR_MESSAGE);
        return null;
    }
}

From source file:sirius.trainer.step4.RunClassifier.java

License:Open Source License

public static Classifier startClassifierTwo(JInternalFrame parent, ApplicationData applicationData,
        JTextArea classifierTwoDisplayTextArea, GenericObjectEditor m_ClassifierEditor2,
        Classifier classifierOne, GraphPane myGraph, boolean test, ClassifierResults classifierResults,
        int range, double threshold) {
    int arraySize = 0;
    int lineCount = 0;
    try {//w  ww. jav  a 2  s .  co m
        StatusPane statusPane = applicationData.getStatusPane();
        //Initialising      
        long totalTimeStart = System.currentTimeMillis();
        Step1TableModel positiveStep1TableModel = applicationData.getPositiveStep1TableModel();
        Step1TableModel negativeStep1TableModel = applicationData.getNegativeStep1TableModel();
        int positiveDataset3FromInt = applicationData.getPositiveDataset3FromField();
        int positiveDataset3ToInt = applicationData.getPositiveDataset3ToField();
        int negativeDataset3FromInt = applicationData.getNegativeDataset3FromField();
        int negativeDataset3ToInt = applicationData.getNegativeDataset3ToField();

        //Preparing Dataset2.arff to train Classifier Two
        statusPane.setText("Preparing Dataset2.arff...");
        //This step generates Dataset2.arff
        if (DatasetGenerator.generateDataset2(parent, applicationData, applicationData.getSetUpstream(),
                applicationData.getSetDownstream(), classifierOne) == false) {
            //Interrupted or Error occurred
            return null;
        }

        //Training Classifier Two
        statusPane.setText("Training Classifier Two... May take a while... Please wait...");
        Instances inst2 = new Instances(new BufferedReader(
                new FileReader(applicationData.getWorkingDirectory() + File.separator + "Dataset2.arff")));
        inst2.setClassIndex(inst2.numAttributes() - 1);
        long trainTimeStart = 0;
        long trainTimeElapsed = 0;

        Classifier classifierTwo = (Classifier) m_ClassifierEditor2.getValue();
        trainTimeStart = System.currentTimeMillis();
        applicationData.setDataset2Instances(inst2);
        classifierTwo.buildClassifier(inst2);
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;

        //Running Classifier Two   
        String classifierName = m_ClassifierEditor2.getValue().getClass().getName();
        classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName);
        classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                applicationData.getWorkingDirectory() + File.separator + "Dataset2.arff");
        classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");

        if (test == false) {
            statusPane.setText("Classifier Two Trained...Done...");
            return classifierTwo;
        }
        if (applicationData.terminateThread == true) {
            statusPane.setText("Interrupted - Classifier One Training Completed");
            return classifierTwo;
        }
        statusPane.setText("Running Classifier Two on Dataset 3...");

        //Generate the header for ClassifierTwo.scores on Dataset3            
        BufferedWriter classifierTwoOutput = new BufferedWriter(new FileWriter(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierTwo.scores"));
        if (m_ClassifierEditor2.getValue() instanceof OptionHandler)
            classifierName += " "
                    + Utils.joinOptions(((OptionHandler) m_ClassifierEditor2.getValue()).getOptions());

        //Generating an Instance given a sequence with the current attributes
        int setClassifierTwoUpstreamInt = applicationData.getSetUpstream();
        int setClassifierTwoDownstreamInt = applicationData.getSetDownstream();
        int classifierTwoWindowSize;
        if (setClassifierTwoUpstreamInt < 0 && setClassifierTwoDownstreamInt > 0)
            classifierTwoWindowSize = (setClassifierTwoUpstreamInt * -1) + setClassifierTwoDownstreamInt;
        else if (setClassifierTwoUpstreamInt < 0 && setClassifierTwoDownstreamInt < 0)
            classifierTwoWindowSize = (setClassifierTwoUpstreamInt - setClassifierTwoDownstreamInt - 1) * -1;
        else//both +ve
            classifierTwoWindowSize = (setClassifierTwoDownstreamInt - setClassifierTwoUpstreamInt + 1);

        Instances inst = applicationData.getDataset1Instances();

        //NOTE: need to take care of this function;    
        FastaFileManipulation fastaFile = new FastaFileManipulation(positiveStep1TableModel,
                negativeStep1TableModel, positiveDataset3FromInt, positiveDataset3ToInt,
                negativeDataset3FromInt, negativeDataset3ToInt, applicationData.getWorkingDirectory());

        //loading in all the features..
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        //Reading the fastaFile                                
        String _class = "pos";
        lineCount = 0;
        int totalPosSequences = positiveDataset3ToInt - positiveDataset3FromInt + 1;
        FastaFormat fastaFormat;
        while ((fastaFormat = fastaFile.nextSequence(_class)) != null) {
            if (applicationData.terminateThread == true) {
                statusPane.setText("Interrupted - Classifier Two Trained");
                classifierTwoOutput.close();
                return classifierTwo;
            }
            lineCount++;
            classifierTwoOutput.write(fastaFormat.getHeader());
            classifierTwoOutput.newLine();
            classifierTwoOutput.write(fastaFormat.getSequence());
            classifierTwoOutput.newLine();
            //if((lineCount % 100) == 0){                      
            statusPane.setText("Running ClassifierTwo on Dataset 3...@ " + lineCount + " / "
                    + applicationData.getTotalSequences(3) + " Sequences");
            //}
            arraySize = fastaFormat.getArraySize(applicationData.getLeftMostPosition(),
                    applicationData.getRightMostPosition());
            //This area always generate -ve arraySize~! WHY?? Exception always occur here              
            double scores[] = new double[arraySize];
            int predictPosition[] = fastaFormat.getPredictPositionForClassifierOne(
                    applicationData.getLeftMostPosition(), applicationData.getRightMostPosition());
            //Doing shift from upstream till downstream   
            SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(), predictPosition[0],
                    predictPosition[1]);
            int scoreCount = 0;
            String line2;
            while ((line2 = seq.nextShift()) != null) {
                Instance tempInst = new Instance(inst.numAttributes());
                tempInst.setDataset(inst);
                //-1 because class attribute can be ignored
                for (int x = 0; x < inst.numAttributes() - 1; x++) {
                    Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                            featureDataArrayList.get(x), applicationData.getScoringMatrixIndex(),
                            applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix());
                    if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                        tempInst.setValue(x, (Integer) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                        tempInst.setValue(x, (Double) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                        tempInst.setValue(x, (String) obj);
                    else {
                        classifierTwoOutput.close();
                        throw new Error("Unknown: " + obj.getClass().getName());
                    }
                }
                tempInst.setValue(inst.numAttributes() - 1, _class);
                //Run classifierOne                 
                double[] results = classifierOne.distributionForInstance(tempInst);
                scores[scoreCount++] = results[0];
            }
            //Run classifierTwo                 
            int currentPosition = fastaFormat.getPredictionFromForClassifierTwo(
                    applicationData.getLeftMostPosition(), applicationData.getRightMostPosition(),
                    applicationData.getSetUpstream());
            classifierTwoOutput.write(_class);
            for (int y = 0; y < arraySize - classifierTwoWindowSize + 1; y++) {
                //+1 is for the class index
                Instance tempInst2 = new Instance(classifierTwoWindowSize + 1);
                tempInst2.setDataset(inst2);
                for (int x = 0; x < classifierTwoWindowSize; x++) {
                    tempInst2.setValue(x, scores[x + y]);
                }
                tempInst2.setValue(tempInst2.numAttributes() - 1, _class);
                double[] results = classifierTwo.distributionForInstance(tempInst2);
                classifierTwoOutput.write("," + currentPosition + "=" + results[0]);
                currentPosition++;
                if (currentPosition == 0)
                    currentPosition++;
            }
            classifierTwoOutput.newLine();
            classifierTwoOutput.flush();
            if (lineCount == totalPosSequences)
                _class = "neg";
        }
        classifierTwoOutput.close();
        statusPane.setText("Done!");
        PredictionStats classifierTwoStatsOnBlindTest = new PredictionStats(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierTwo.scores", range,
                threshold);
        //display(double range)
        long totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                        + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");
        classifierTwoStatsOnBlindTest.updateDisplay(classifierResults, classifierTwoDisplayTextArea, true);
        applicationData.setClassifierTwoStats(classifierTwoStatsOnBlindTest);
        myGraph.setMyStats(classifierTwoStatsOnBlindTest);
        fastaFile.cleanUp();
        return classifierTwo;
    } catch (Exception ex) {
        ex.printStackTrace();
        JOptionPane.showMessageDialog(parent,
                ex.getMessage() + "Classifier Two On Blind Test Set - Check Console Output",
                "Evaluate classifier two", JOptionPane.ERROR_MESSAGE);
        System.err.println("applicationData.getLeftMostPosition(): " + applicationData.getLeftMostPosition());
        System.err.println("applicationData.getRightMostPosition(): " + applicationData.getRightMostPosition());
        System.err.println("arraySize: " + arraySize);
        System.err.println("lineCount: " + lineCount);
        return null;
    }
}

From source file:sirius.trainer.step4.RunClassifier.java

License:Open Source License

public static Classifier xValidateClassifierOne(JInternalFrame parent, ApplicationData applicationData,
        JTextArea classifierOneDisplayTextArea, GenericObjectEditor m_ClassifierEditor, int folds,
        GraphPane myGraph, ClassifierResults classifierResults, int range, double threshold,
        boolean outputClassifier) {
    try {/*  ww w.  ja v  a2s  . c om*/
        StatusPane statusPane = applicationData.getStatusPane();

        long totalTimeStart = System.currentTimeMillis(), totalTimeElapsed;
        //Classifier tempClassifier = (Classifier) m_ClassifierEditor.getValue();
        int positiveDataset1FromInt = applicationData.getPositiveDataset1FromField();
        int positiveDataset1ToInt = applicationData.getPositiveDataset1ToField();
        int negativeDataset1FromInt = applicationData.getNegativeDataset1FromField();
        int negativeDataset1ToInt = applicationData.getNegativeDataset1ToField();

        Step1TableModel positiveStep1TableModel = applicationData.getPositiveStep1TableModel();
        Step1TableModel negativeStep1TableModel = applicationData.getNegativeStep1TableModel();

        Instances inst = new Instances(applicationData.getDataset1Instances());
        inst.setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);

        //Train classifier one with the full dataset first then do cross-validation to gauge its accuracy                    
        long trainTimeStart = 0, trainTimeElapsed = 0;
        Classifier classifierOne = (Classifier) m_ClassifierEditor.getValue();
        statusPane.setText("Training Classifier One... May take a while... Please wait...");
        //Record Start Time
        trainTimeStart = System.currentTimeMillis();
        inst.deleteAttributeType(Attribute.STRING);
        if (outputClassifier)
            classifierOne.buildClassifier(inst);
        //Record Total Time used to build classifier one
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
        //Training Done                        

        String classifierName = m_ClassifierEditor.getValue().getClass().getName();
        classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName);
        classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                folds + " fold cross-validation on Dataset1.arff");
        classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");

        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int y = 0; y < inst.numAttributes() - 1; y++) {
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(y).name()));
        }

        BufferedWriter outputCrossValidation = new BufferedWriter(new FileWriter(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierOne.scores"));

        for (int x = 0; x < folds; x++) {
            File trainFile = new File(applicationData.getWorkingDirectory() + File.separator
                    + "trainingDataset1_" + (x + 1) + ".arff");
            File testFile = new File(applicationData.getWorkingDirectory() + File.separator + "testingDataset1_"
                    + (x + 1) + ".fasta");
            //AHFU_DEBUG
            //Generate also the training file in fasta format for debugging purpose
            File trainFileFasta = new File(applicationData.getWorkingDirectory() + File.separator
                    + "trainingDataset1_" + (x + 1) + ".fasta");
            //AHFU_DEBUG_END

            //AHFU_DEBUG - This part is to generate the TestClassifierTwo.arff for use in WEKA to test classifierTwo
            //TestClassifierTwo.arff - predictions scores from Set Upstream Field to Set Downstream Field
            //Now first generate the header for TestClassifierTwo.arff
            BufferedWriter testClassifierTwoArff = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "TestClassifierTwo_"
                            + (x + 1) + ".arff"));
            int setClassifierTwoUpstreamInt = -40;
            int setClassifierTwoDownstreamInt = 41;
            testClassifierTwoArff.write("@relation \'Used to Test Classifier Two\'");
            testClassifierTwoArff.newLine();
            for (int d = setClassifierTwoUpstreamInt; d <= setClassifierTwoDownstreamInt; d++) {
                if (d == 0)
                    continue;
                testClassifierTwoArff.write("@attribute (" + d + ") numeric");
                testClassifierTwoArff.newLine();
            }
            if (positiveDataset1FromInt > 0 && negativeDataset1FromInt > 0)
                testClassifierTwoArff.write("@attribute Class {pos,neg}");
            else if (positiveDataset1FromInt > 0 && negativeDataset1FromInt == 0)
                testClassifierTwoArff.write("@attribute Class {pos}");
            else if (positiveDataset1FromInt == 0 && negativeDataset1FromInt > 0)
                testClassifierTwoArff.write("@attribute Class {neg}");
            testClassifierTwoArff.newLine();
            testClassifierTwoArff.newLine();
            testClassifierTwoArff.write("@data");
            testClassifierTwoArff.newLine();
            testClassifierTwoArff.newLine();
            //END of AHFU_DEBUG
            statusPane.setText("Building Fold " + (x + 1) + "...");
            FastaFileManipulation fastaFile = new FastaFileManipulation(positiveStep1TableModel,
                    negativeStep1TableModel, positiveDataset1FromInt, positiveDataset1ToInt,
                    negativeDataset1FromInt, negativeDataset1ToInt, applicationData.getWorkingDirectory());

            //1) generate trainingDatasetX.arff headings
            BufferedWriter trainingOutputFile = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "trainingDataset1_"
                            + (x + 1) + ".arff"));
            trainingOutputFile.write("@relation 'A temp file for X-validation purpose' ");
            trainingOutputFile.newLine();
            trainingOutputFile.newLine();
            trainingOutputFile.flush();

            for (int y = 0; y < inst.numAttributes() - 1; y++) {
                if (inst.attribute(y).type() == Attribute.NUMERIC)
                    trainingOutputFile.write("@attribute " + inst.attribute(y).name() + " numeric");
                else if (inst.attribute(y).type() == Attribute.STRING)
                    trainingOutputFile.write("@attribute " + inst.attribute(y).name() + " String");
                else {
                    testClassifierTwoArff.close();
                    outputCrossValidation.close();
                    trainingOutputFile.close();
                    throw new Error("Unknown type: " + inst.attribute(y).name());
                }
                trainingOutputFile.newLine();
                trainingOutputFile.flush();
            }
            if (positiveDataset1FromInt > 0 && negativeDataset1FromInt > 0)
                trainingOutputFile.write("@attribute Class {pos,neg}");
            else if (positiveDataset1FromInt > 0 && negativeDataset1FromInt == 0)
                trainingOutputFile.write("@attribute Class {pos}");
            else if (positiveDataset1FromInt == 0 && negativeDataset1FromInt > 0)
                trainingOutputFile.write("@attribute Class {neg}");
            trainingOutputFile.newLine();
            trainingOutputFile.newLine();
            trainingOutputFile.write("@data");
            trainingOutputFile.newLine();
            trainingOutputFile.newLine();
            trainingOutputFile.flush();

            //2) generate testingDataset1.fasta
            BufferedWriter testingOutputFile = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "testingDataset1_"
                            + (x + 1) + ".fasta"));

            //AHFU_DEBUG
            //Open the IOStream for training file (fasta format)
            BufferedWriter trainingOutputFileFasta = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "trainingDataset1_"
                            + (x + 1) + ".fasta"));
            //AHFU_DEBUG_END

            //Now, populating data for both the training and testing files            
            int fastaFileLineCounter = 0;
            int posTestSequenceCounter = 0;
            int totalTestSequenceCounter = 0;
            //For pos sequences   
            FastaFormat fastaFormat;
            while ((fastaFormat = fastaFile.nextSequence("pos")) != null) {
                if ((fastaFileLineCounter % folds) == x) {//This sequence for testing
                    testingOutputFile.write(fastaFormat.getHeader());
                    testingOutputFile.newLine();
                    testingOutputFile.write(fastaFormat.getSequence());
                    testingOutputFile.newLine();
                    testingOutputFile.flush();
                    posTestSequenceCounter++;
                    totalTestSequenceCounter++;
                } else {//for training
                    for (int z = 0; z < inst.numAttributes() - 1; z++) {
                        trainingOutputFile.write(GenerateArff.getMatchCount(fastaFormat,
                                featureDataArrayList.get(z), applicationData.getScoringMatrixIndex(),
                                applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix())
                                + ",");
                    }
                    trainingOutputFile.write("pos");
                    trainingOutputFile.newLine();
                    trainingOutputFile.flush();

                    //AHFU_DEBUG
                    //Write the datas into the training file in fasta format
                    trainingOutputFileFasta.write(fastaFormat.getHeader());
                    trainingOutputFileFasta.newLine();
                    trainingOutputFileFasta.write(fastaFormat.getSequence());
                    trainingOutputFileFasta.newLine();
                    trainingOutputFileFasta.flush();
                    //AHFU_DEBUG_END
                }
                fastaFileLineCounter++;
            }
            //For neg sequences
            fastaFileLineCounter = 0;
            while ((fastaFormat = fastaFile.nextSequence("neg")) != null) {
                if ((fastaFileLineCounter % folds) == x) {//This sequence for testing
                    testingOutputFile.write(fastaFormat.getHeader());
                    testingOutputFile.newLine();
                    testingOutputFile.write(fastaFormat.getSequence());
                    testingOutputFile.newLine();
                    testingOutputFile.flush();
                    totalTestSequenceCounter++;
                } else {//for training
                    for (int z = 0; z < inst.numAttributes() - 1; z++) {
                        trainingOutputFile.write(GenerateArff.getMatchCount(fastaFormat,
                                featureDataArrayList.get(z), applicationData.getScoringMatrixIndex(),
                                applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix())
                                + ",");
                    }
                    trainingOutputFile.write("neg");
                    trainingOutputFile.newLine();
                    trainingOutputFile.flush();

                    //AHFU_DEBUG
                    //Write the datas into the training file in fasta format
                    trainingOutputFileFasta.write(fastaFormat.getHeader());
                    trainingOutputFileFasta.newLine();
                    trainingOutputFileFasta.write(fastaFormat.getSequence());
                    trainingOutputFileFasta.newLine();
                    trainingOutputFileFasta.flush();
                    //AHFU_DEBUG_END
                }
                fastaFileLineCounter++;
            }
            trainingOutputFileFasta.close();
            trainingOutputFile.close();
            testingOutputFile.close();
            //3) train and test the classifier then store the statistics              
            Classifier foldClassifier = (Classifier) m_ClassifierEditor.getValue();
            Instances instFoldTrain = new Instances(
                    new BufferedReader(new FileReader(applicationData.getWorkingDirectory() + File.separator
                            + "trainingDataset1_" + (x + 1) + ".arff")));
            instFoldTrain.setClassIndex(instFoldTrain.numAttributes() - 1);
            foldClassifier.buildClassifier(instFoldTrain);

            //Reading the test file
            statusPane.setText("Evaluating fold " + (x + 1) + "..");
            BufferedReader testingInput = new BufferedReader(
                    new FileReader(applicationData.getWorkingDirectory() + File.separator + "testingDataset1_"
                            + (x + 1) + ".fasta"));
            int lineCounter = 0;
            String lineHeader;
            String lineSequence;
            while ((lineHeader = testingInput.readLine()) != null) {
                if (applicationData.terminateThread == true) {
                    statusPane.setText("Interrupted - Classifier One Training Completed");
                    testingInput.close();
                    testClassifierTwoArff.close();
                    return classifierOne;
                }
                lineSequence = testingInput.readLine();
                outputCrossValidation.write(lineHeader);
                outputCrossValidation.newLine();
                outputCrossValidation.write(lineSequence);
                outputCrossValidation.newLine();
                lineCounter++;
                //For each sequence, you want to shift from upstream till downstream 
                //ie changing the +1 location
                //to get the scores by classifier one so that can use it to train classifier two later
                //Doing shift from upstream till downstream    
                //if(lineCounter % 100 == 0)
                statusPane.setText("Evaluating fold " + (x + 1) + ".. @ " + lineCounter + " / "
                        + totalTestSequenceCounter);

                fastaFormat = new FastaFormat(lineHeader, lineSequence);
                int predictPosition[] = fastaFormat.getPredictPositionForClassifierOne(
                        applicationData.getLeftMostPosition(), applicationData.getRightMostPosition());

                SequenceManipulation seq = new SequenceManipulation(lineSequence, predictPosition[0],
                        predictPosition[1]);
                int currentPosition = predictPosition[0];
                String line2;
                if (lineCounter > posTestSequenceCounter)
                    outputCrossValidation.write("neg");
                else
                    outputCrossValidation.write("pos");
                while ((line2 = seq.nextShift()) != null) {
                    Instance tempInst;
                    tempInst = new Instance(inst.numAttributes());
                    tempInst.setDataset(inst);
                    for (int i = 0; i < inst.numAttributes() - 1; i++) {
                        //-1 because class attribute can be ignored
                        //Give the sequence and the featureList to get the feature freqs on the sequence
                        Object obj = GenerateArff.getMatchCount(lineHeader, line2, featureDataArrayList.get(i),
                                applicationData.getScoringMatrixIndex(),
                                applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix());
                        if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                            tempInst.setValue(x, (Integer) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                            tempInst.setValue(x, (Double) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                            tempInst.setValue(x, (String) obj);
                        else {
                            testingInput.close();
                            testClassifierTwoArff.close();
                            outputCrossValidation.close();
                            throw new Error("Unknown: " + obj.getClass().getName());
                        }
                    }
                    if (lineCounter > posTestSequenceCounter)
                        tempInst.setValue(inst.numAttributes() - 1, "neg");
                    else
                        tempInst.setValue(inst.numAttributes() - 1, "pos");
                    double[] results = foldClassifier.distributionForInstance(tempInst);
                    outputCrossValidation.write("," + currentPosition + "=" + results[0]);
                    //AHFU_DEBUG 
                    double[] resultsDebug = classifierOne.distributionForInstance(tempInst);
                    if (currentPosition >= setClassifierTwoUpstreamInt
                            && currentPosition <= setClassifierTwoDownstreamInt)
                        testClassifierTwoArff.write(resultsDebug[0] + ",");
                    //AHFU_DEBUG_END
                    currentPosition++;
                    if (currentPosition == 0)
                        currentPosition++;
                } //end of sequence shift                               
                outputCrossValidation.newLine();
                outputCrossValidation.flush();
                //AHFU_DEBUG
                if (lineCounter > posTestSequenceCounter)
                    testClassifierTwoArff.write("neg");
                else
                    testClassifierTwoArff.write("pos");
                testClassifierTwoArff.newLine();
                testClassifierTwoArff.flush();
                //AHFU_DEBUG_END
            } //end of reading test file
            outputCrossValidation.close();
            testingInput.close();
            testClassifierTwoArff.close();
            fastaFile.cleanUp();

            //NORMAL MODE
            //trainFile.delete();
            //testFile.delete();
            //NORMAL MODE END
            //AHFU_DEBUG MODE
            //testClassifierTwoArff.close();            
            trainFile.deleteOnExit();
            testFile.deleteOnExit();
            trainFileFasta.deleteOnExit();
            //AHFU_DEBUG_MODE_END
        } //end of for loop for xvalidation

        PredictionStats classifierOneStatsOnXValidation = new PredictionStats(
                applicationData.getWorkingDirectory() + File.separator + "ClassifierOne.scores", range,
                threshold);
        //display(double range)
        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                        + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");
        classifierOneStatsOnXValidation.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        applicationData.setClassifierOneStats(classifierOneStatsOnXValidation);
        myGraph.setMyStats(classifierOneStatsOnXValidation);

        statusPane.setText("Done!");

        return classifierOne;
    } catch (Exception e) {
        e.printStackTrace();
        JOptionPane.showMessageDialog(parent, e.getMessage(), "ERROR", JOptionPane.ERROR_MESSAGE);
        return null;
    }
}

From source file:sirius.trainer.step4.RunClassifier.java

License:Open Source License

public static Classifier xValidateClassifierTwo(JInternalFrame parent, ApplicationData applicationData,
        JTextArea classifierTwoDisplayTextArea, GenericObjectEditor m_ClassifierEditor2,
        Classifier classifierOne, int folds, GraphPane myGraph, ClassifierResults classifierResults, int range,
        double threshold, boolean outputClassifier) {
    try {/*from   www  .  jav  a 2 s. co  m*/
        StatusPane statusPane = applicationData.getStatusPane();

        long totalTimeStart = System.currentTimeMillis(), totalTimeElapsed;
        //Classifier tempClassifier = (Classifier) m_ClassifierEditor2.getValue();
        final int positiveDataset2FromInt = applicationData.getPositiveDataset2FromField();
        final int positiveDataset2ToInt = applicationData.getPositiveDataset2ToField();
        final int negativeDataset2FromInt = applicationData.getNegativeDataset2FromField();
        final int negativeDataset2ToInt = applicationData.getNegativeDataset2ToField();

        final int totalDataset2Sequences = (positiveDataset2ToInt - positiveDataset2FromInt + 1)
                + (negativeDataset2ToInt - negativeDataset2FromInt + 1);

        final int classifierTwoUpstream = applicationData.getSetUpstream();
        final int classifierTwoDownstream = applicationData.getSetDownstream();

        Step1TableModel positiveStep1TableModel = applicationData.getPositiveStep1TableModel();
        Step1TableModel negativeStep1TableModel = applicationData.getNegativeStep1TableModel();

        //Train classifier two with the full dataset first then do cross-validation to gauge its accuracy                      
        //Preparing Dataset2.arff to train Classifier Two
        long trainTimeStart = 0, trainTimeElapsed = 0;
        statusPane.setText("Preparing Dataset2.arff...");
        //This step generates Dataset2.arff
        if (DatasetGenerator.generateDataset2(parent, applicationData, applicationData.getSetUpstream(),
                applicationData.getSetDownstream(), classifierOne) == false) {
            //Interrupted or Error occurred
            return null;
        }
        Instances instOfDataset2 = new Instances(new BufferedReader(
                new FileReader(applicationData.getWorkingDirectory() + File.separator + "Dataset2.arff")));
        instOfDataset2.setClassIndex(instOfDataset2.numAttributes() - 1);
        applicationData.setDataset2Instances(instOfDataset2);
        Classifier classifierTwo = (Classifier) m_ClassifierEditor2.getValue();
        statusPane.setText("Training Classifier Two... May take a while... Please wait...");
        //Record Start Time
        trainTimeStart = System.currentTimeMillis();
        if (outputClassifier)
            classifierTwo.buildClassifier(instOfDataset2);
        //Record Total Time used to build classifier one
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
        //Training Done          

        String classifierName = m_ClassifierEditor2.getValue().getClass().getName();
        classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName);
        classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                folds + " fold cross-validation on Dataset2.arff");
        classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");

        Instances instOfDataset1 = new Instances(applicationData.getDataset1Instances());
        instOfDataset1.setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int y = 0; y < instOfDataset1.numAttributes() - 1; y++) {
            featureDataArrayList.add(Feature.levelOneClassifierPane(instOfDataset1.attribute(y).name()));
        }

        //Generating an Instance given a sequence with the current attributes
        int setClassifierTwoUpstreamInt = applicationData.getSetUpstream();
        int setClassifierTwoDownstreamInt = applicationData.getSetDownstream();
        int classifierTwoWindowSize;
        if (setClassifierTwoUpstreamInt < 0 && setClassifierTwoDownstreamInt > 0)
            classifierTwoWindowSize = (setClassifierTwoUpstreamInt * -1) + setClassifierTwoDownstreamInt;
        else if (setClassifierTwoUpstreamInt < 0 && setClassifierTwoDownstreamInt < 0)
            classifierTwoWindowSize = (setClassifierTwoUpstreamInt - setClassifierTwoDownstreamInt - 1) * -1;
        else//both +ve
            classifierTwoWindowSize = (setClassifierTwoDownstreamInt - setClassifierTwoUpstreamInt + 1);

        int posTestSequenceCounter = 0;

        BufferedWriter outputCrossValidation = new BufferedWriter(new FileWriter(
                applicationData.getWorkingDirectory() + File.separator + "classifierTwo.scores"));

        for (int x = 0; x < folds; x++) {
            File trainFile = new File(applicationData.getWorkingDirectory() + File.separator
                    + "trainingDataset2_" + (x + 1) + ".arff");
            File testFile = new File(applicationData.getWorkingDirectory() + File.separator + "testingDataset2_"
                    + (x + 1) + ".fasta");

            statusPane.setText("Preparing Training Data for Fold " + (x + 1) + "..");
            FastaFileManipulation fastaFile = new FastaFileManipulation(positiveStep1TableModel,
                    negativeStep1TableModel, positiveDataset2FromInt, positiveDataset2ToInt,
                    negativeDataset2FromInt, negativeDataset2ToInt, applicationData.getWorkingDirectory());

            //1) generate trainingDataset2.arff headings
            BufferedWriter trainingOutputFile = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "trainingDataset2_"
                            + (x + 1) + ".arff"));
            trainingOutputFile.write("@relation 'A temp file for X-validation purpose' ");
            trainingOutputFile.newLine();
            trainingOutputFile.newLine();
            trainingOutputFile.flush();
            for (int y = classifierTwoUpstream; y <= classifierTwoDownstream; y++) {
                if (y != 0) {
                    trainingOutputFile.write("@attribute (" + y + ") numeric");
                    trainingOutputFile.newLine();
                    trainingOutputFile.flush();
                }
            }
            if (positiveDataset2FromInt > 0 && negativeDataset2FromInt > 0)
                trainingOutputFile.write("@attribute Class {pos,neg}");
            else if (positiveDataset2FromInt > 0 && negativeDataset2FromInt == 0)
                trainingOutputFile.write("@attribute Class {pos}");
            else if (positiveDataset2FromInt == 0 && negativeDataset2FromInt > 0)
                trainingOutputFile.write("@attribute Class {neg}");
            trainingOutputFile.newLine();
            trainingOutputFile.newLine();
            trainingOutputFile.write("@data");
            trainingOutputFile.newLine();
            trainingOutputFile.newLine();
            trainingOutputFile.flush();
            //AHFU_DEBUG 
            BufferedWriter testingOutputFileArff = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "testingDataset2_"
                            + (x + 1) + ".arff"));
            testingOutputFileArff.write("@relation 'A temp file for X-validation purpose' ");
            testingOutputFileArff.newLine();
            testingOutputFileArff.newLine();
            testingOutputFileArff.flush();
            for (int y = classifierTwoUpstream; y <= classifierTwoDownstream; y++) {
                if (y != 0) {
                    testingOutputFileArff.write("@attribute (" + y + ") numeric");
                    testingOutputFileArff.newLine();
                    testingOutputFileArff.flush();
                }
            }
            if (positiveDataset2FromInt > 0 && negativeDataset2FromInt > 0)
                testingOutputFileArff.write("@attribute Class {pos,neg}");
            else if (positiveDataset2FromInt > 0 && negativeDataset2FromInt == 0)
                testingOutputFileArff.write("@attribute Class {pos}");
            else if (positiveDataset2FromInt == 0 && negativeDataset2FromInt > 0)
                testingOutputFileArff.write("@attribute Class {neg}");
            testingOutputFileArff.newLine();
            testingOutputFileArff.newLine();
            testingOutputFileArff.write("@data");
            testingOutputFileArff.newLine();
            testingOutputFileArff.newLine();
            testingOutputFileArff.flush();
            //AHFU_DEBUG END
            //2) generate testingDataset2.fasta
            BufferedWriter testingOutputFile = new BufferedWriter(
                    new FileWriter(applicationData.getWorkingDirectory() + File.separator + "testingDataset2_"
                            + (x + 1) + ".fasta"));

            //Now, populating datas for both the training and testing files            
            int fastaFileLineCounter = 0;
            posTestSequenceCounter = 0;
            int totalTestSequenceCounter = 0;
            int totalTrainTestSequenceCounter = 0;
            FastaFormat fastaFormat;
            //For pos sequences   
            while ((fastaFormat = fastaFile.nextSequence("pos")) != null) {
                if (applicationData.terminateThread == true) {
                    statusPane.setText("Interrupted - Classifier Two Trained");
                    outputCrossValidation.close();
                    testingOutputFileArff.close();
                    testingOutputFile.close();
                    trainingOutputFile.close();
                    return classifierTwo;
                }
                totalTrainTestSequenceCounter++;
                //if(totalTrainTestSequenceCounter%100 == 0)
                statusPane.setText("Preparing Training Data for Fold " + (x + 1) + ".. @ "
                        + totalTrainTestSequenceCounter + " / " + totalDataset2Sequences);
                if ((fastaFileLineCounter % folds) == x) {//This sequence is for testing
                    testingOutputFile.write(fastaFormat.getHeader());
                    testingOutputFile.newLine();
                    testingOutputFile.write(fastaFormat.getSequence());
                    testingOutputFile.newLine();
                    testingOutputFile.flush();
                    posTestSequenceCounter++;
                    totalTestSequenceCounter++;
                    //AHFU DEBUG
                    SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(),
                            classifierTwoUpstream, classifierTwoDownstream);
                    String line2;
                    while ((line2 = seq.nextShift()) != null) {
                        Instance tempInst = new Instance(instOfDataset1.numAttributes());
                        tempInst.setDataset(instOfDataset1);
                        //-1 because class attribute can be ignored
                        for (int w = 0; w < instOfDataset1.numAttributes() - 1; w++) {
                            Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                                    featureDataArrayList.get(w), applicationData.getScoringMatrixIndex(),
                                    applicationData.getCountingStyleIndex(),
                                    applicationData.getScoringMatrix());
                            if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                                tempInst.setValue(w, (Integer) obj);
                            else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                                tempInst.setValue(w, (Double) obj);
                            else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                                tempInst.setValue(w, (String) obj);
                            else {
                                outputCrossValidation.close();
                                testingOutputFileArff.close();
                                testingOutputFile.close();
                                trainingOutputFile.close();
                                throw new Error("Unknown: " + obj.getClass().getName());
                            }
                        }
                        tempInst.setValue(tempInst.numAttributes() - 1, "pos");
                        double[] results = classifierOne.distributionForInstance(tempInst);
                        testingOutputFileArff.write(results[0] + ",");
                    }
                    testingOutputFileArff.write("pos");
                    testingOutputFileArff.newLine();
                    testingOutputFileArff.flush();
                    //AHFU DEBUG END
                } else {//This sequence is for training
                    SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(),
                            classifierTwoUpstream, classifierTwoDownstream);
                    String line2;
                    while ((line2 = seq.nextShift()) != null) {
                        Instance tempInst = new Instance(instOfDataset1.numAttributes());
                        tempInst.setDataset(instOfDataset1);
                        //-1 because class attribute can be ignored
                        for (int w = 0; w < instOfDataset1.numAttributes() - 1; w++) {
                            Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                                    featureDataArrayList.get(w), applicationData.getScoringMatrixIndex(),
                                    applicationData.getCountingStyleIndex(),
                                    applicationData.getScoringMatrix());
                            if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                                tempInst.setValue(w, (Integer) obj);
                            else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                                tempInst.setValue(w, (Double) obj);
                            else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                                tempInst.setValue(w, (String) obj);
                            else {
                                outputCrossValidation.close();
                                testingOutputFileArff.close();
                                testingOutputFile.close();
                                trainingOutputFile.close();
                                throw new Error("Unknown: " + obj.getClass().getName());
                            }
                        }
                        tempInst.setValue(tempInst.numAttributes() - 1, "pos");
                        double[] results = classifierOne.distributionForInstance(tempInst);
                        trainingOutputFile.write(results[0] + ",");
                    }
                    trainingOutputFile.write("pos");
                    trainingOutputFile.newLine();
                    trainingOutputFile.flush();
                }
                fastaFileLineCounter++;
            }
            //For neg sequences
            fastaFileLineCounter = 0;
            while ((fastaFormat = fastaFile.nextSequence("neg")) != null) {
                if (applicationData.terminateThread == true) {
                    statusPane.setText("Interrupted - Classifier Two Trained");
                    outputCrossValidation.close();
                    testingOutputFileArff.close();
                    testingOutputFile.close();
                    trainingOutputFile.close();
                    return classifierTwo;
                }
                totalTrainTestSequenceCounter++;
                //if(totalTrainTestSequenceCounter%100 == 0)
                statusPane.setText("Preparing Training Data for Fold " + (x + 1) + ".. @ "
                        + totalTrainTestSequenceCounter + " / " + totalDataset2Sequences);
                if ((fastaFileLineCounter % folds) == x) {//This sequence is for testing
                    testingOutputFile.write(fastaFormat.getHeader());
                    testingOutputFile.newLine();
                    testingOutputFile.write(fastaFormat.getSequence());
                    testingOutputFile.newLine();
                    testingOutputFile.flush();
                    totalTestSequenceCounter++;
                    //AHFU DEBUG
                    SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(),
                            classifierTwoUpstream, classifierTwoDownstream);
                    String line2;
                    while ((line2 = seq.nextShift()) != null) {
                        Instance tempInst = new Instance(instOfDataset1.numAttributes());
                        tempInst.setDataset(instOfDataset1);
                        //-1 because class attribute can be ignored
                        for (int w = 0; w < instOfDataset1.numAttributes() - 1; w++) {
                            Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                                    featureDataArrayList.get(w), applicationData.getScoringMatrixIndex(),
                                    applicationData.getCountingStyleIndex(),
                                    applicationData.getScoringMatrix());
                            if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                                tempInst.setValue(w, (Integer) obj);
                            else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                                tempInst.setValue(w, (Double) obj);
                            else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                                tempInst.setValue(w, (String) obj);
                            else {
                                outputCrossValidation.close();
                                testingOutputFileArff.close();
                                testingOutputFile.close();
                                trainingOutputFile.close();
                                throw new Error("Unknown: " + obj.getClass().getName());
                            }
                        }
                        tempInst.setValue(tempInst.numAttributes() - 1, "pos");//pos or neg does not matter here - not used         
                        double[] results = classifierOne.distributionForInstance(tempInst);
                        testingOutputFileArff.write(results[0] + ",");
                    }
                    testingOutputFileArff.write("neg");
                    testingOutputFileArff.newLine();
                    testingOutputFileArff.flush();
                    //AHFU DEBUG END
                } else {//This sequence is for training
                    SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(),
                            classifierTwoUpstream, classifierTwoDownstream);
                    String line2;
                    while ((line2 = seq.nextShift()) != null) {
                        Instance tempInst = new Instance(instOfDataset1.numAttributes());
                        tempInst.setDataset(instOfDataset1);
                        //-1 because class attribute can be ignored
                        for (int w = 0; w < instOfDataset1.numAttributes() - 1; w++) {
                            Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                                    featureDataArrayList.get(w), applicationData.getScoringMatrixIndex(),
                                    applicationData.getCountingStyleIndex(),
                                    applicationData.getScoringMatrix());
                            if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                                tempInst.setValue(w, (Integer) obj);
                            else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                                tempInst.setValue(w, (Double) obj);
                            else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                                tempInst.setValue(w, (String) obj);
                            else {
                                outputCrossValidation.close();
                                testingOutputFileArff.close();
                                testingOutputFile.close();
                                trainingOutputFile.close();
                                throw new Error("Unknown: " + obj.getClass().getName());
                            }
                        }
                        tempInst.setValue(tempInst.numAttributes() - 1, "pos");//pos or neg does not matter here - not used              
                        double[] results = classifierOne.distributionForInstance(tempInst);
                        trainingOutputFile.write(results[0] + ",");
                    }
                    trainingOutputFile.write("neg");
                    trainingOutputFile.newLine();
                    trainingOutputFile.flush();
                }
                fastaFileLineCounter++;
            }
            trainingOutputFile.close();
            testingOutputFile.close();

            //AHFU_DEBUG
            testingOutputFileArff.close();
            //AHFU DEBUG END
            //3) train and test classifier two then store the statistics
            statusPane.setText("Building Fold " + (x + 1) + "..");
            //open an input stream to the arff file 
            BufferedReader trainingInput = new BufferedReader(
                    new FileReader(applicationData.getWorkingDirectory() + File.separator + "trainingDataset2_"
                            + (x + 1) + ".arff"));
            //getting ready to train a foldClassifier using arff file
            Instances instOfTrainingDataset2 = new Instances(
                    new BufferedReader(new FileReader(applicationData.getWorkingDirectory() + File.separator
                            + "trainingDataset2_" + (x + 1) + ".arff")));
            instOfTrainingDataset2.setClassIndex(instOfTrainingDataset2.numAttributes() - 1);
            Classifier foldClassifier = (Classifier) m_ClassifierEditor2.getValue();
            foldClassifier.buildClassifier(instOfTrainingDataset2);
            trainingInput.close();

            //Reading the test file
            statusPane.setText("Evaluating fold " + (x + 1) + "..");
            BufferedReader testingInput = new BufferedReader(
                    new FileReader(applicationData.getWorkingDirectory() + File.separator + "testingDataset2_"
                            + (x + 1) + ".fasta"));
            int lineCounter = 0;
            String lineHeader;
            String lineSequence;
            while ((lineHeader = testingInput.readLine()) != null) {
                if (applicationData.terminateThread == true) {
                    statusPane.setText("Interrupted - Classifier Two Not Trained");
                    outputCrossValidation.close();
                    testingOutputFileArff.close();
                    testingOutputFile.close();
                    trainingOutputFile.close();
                    testingInput.close();
                    return classifierTwo;
                }
                lineSequence = testingInput.readLine();
                outputCrossValidation.write(lineHeader);
                outputCrossValidation.newLine();
                outputCrossValidation.write(lineSequence);
                outputCrossValidation.newLine();
                lineCounter++;
                fastaFormat = new FastaFormat(lineHeader, lineSequence);
                int arraySize = fastaFormat.getArraySize(applicationData.getLeftMostPosition(),
                        applicationData.getRightMostPosition());
                double scores[] = new double[arraySize];
                int predictPosition[] = fastaFormat.getPredictPositionForClassifierOne(
                        applicationData.getLeftMostPosition(), applicationData.getRightMostPosition());
                //For each sequence, you want to shift from upstream till downstream 
                //ie changing the +1 location
                //to get the scores by classifier one so that can use it to train classifier two later
                //Doing shift from upstream till downstream    
                //if(lineCounter % 100 == 0)
                statusPane.setText("Evaluating fold " + (x + 1) + ".. @ " + lineCounter + " / "
                        + totalTestSequenceCounter);
                SequenceManipulation seq = new SequenceManipulation(lineSequence, predictPosition[0],
                        predictPosition[1]);
                int scoreCount = 0;
                String line2;
                while ((line2 = seq.nextShift()) != null) {
                    Instance tempInst = new Instance(instOfDataset1.numAttributes());
                    tempInst.setDataset(instOfDataset1);
                    for (int i = 0; i < instOfDataset1.numAttributes() - 1; i++) {
                        //-1 because class attribute can be ignored
                        //Give the sequence and the featureList to get the feature freqs on the sequence
                        Object obj = GenerateArff.getMatchCount(lineHeader, line2, featureDataArrayList.get(i),
                                applicationData.getScoringMatrixIndex(),
                                applicationData.getCountingStyleIndex(), applicationData.getScoringMatrix());
                        if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                            tempInst.setValue(i, (Integer) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                            tempInst.setValue(i, (Double) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                            tempInst.setValue(i, (String) obj);
                        else {
                            outputCrossValidation.close();
                            testingOutputFileArff.close();
                            testingOutputFile.close();
                            trainingOutputFile.close();
                            testingInput.close();
                            throw new Error("Unknown: " + obj.getClass().getName());
                        }
                    }
                    if (lineCounter > posTestSequenceCounter) {//for neg
                        tempInst.setValue(tempInst.numAttributes() - 1, "neg");
                    } else {
                        tempInst.setValue(tempInst.numAttributes() - 1, "pos");
                    }
                    double[] results = classifierOne.distributionForInstance(tempInst);
                    scores[scoreCount++] = results[0];
                } //end of sequence shift 
                  //Run classifierTwo                 
                int currentPosition = fastaFormat.getPredictionFromForClassifierTwo(
                        applicationData.getLeftMostPosition(), applicationData.getRightMostPosition(),
                        applicationData.getSetUpstream());
                if (lineCounter > posTestSequenceCounter)//neg
                    outputCrossValidation.write("neg");
                else
                    outputCrossValidation.write("pos");
                for (int y = 0; y < arraySize - classifierTwoWindowSize + 1; y++) {
                    //+1 is for the class index
                    Instance tempInst2 = new Instance(classifierTwoWindowSize + 1);
                    tempInst2.setDataset(instOfTrainingDataset2);
                    for (int l = 0; l < classifierTwoWindowSize; l++) {
                        tempInst2.setValue(l, scores[l + y]);
                    }
                    if (lineCounter > posTestSequenceCounter)//for neg
                        tempInst2.setValue(tempInst2.numAttributes() - 1, "neg");
                    else//for pos                          
                        tempInst2.setValue(tempInst2.numAttributes() - 1, "pos");
                    double[] results = foldClassifier.distributionForInstance(tempInst2);
                    outputCrossValidation.write("," + currentPosition + "=" + results[0]);
                    currentPosition++;
                    if (currentPosition == 0)
                        currentPosition++;
                }
                outputCrossValidation.newLine();
                outputCrossValidation.flush();
            } //end of reading test file
            outputCrossValidation.close();
            testingOutputFileArff.close();
            testingOutputFile.close();
            trainingOutputFile.close();
            testingInput.close();
            fastaFile.cleanUp();

            //AHFU_DEBUG
            trainFile.deleteOnExit();
            testFile.deleteOnExit();

            //NORMAL MODE
            //trainFile.delete();
            //testFile.delete();
        } //end of for loop for xvalidation      

        PredictionStats classifierTwoStatsOnXValidation = new PredictionStats(
                applicationData.getWorkingDirectory() + File.separator + "classifierTwo.scores", range,
                threshold);
        //display(double range)
        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                        + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");
        classifierTwoStatsOnXValidation.updateDisplay(classifierResults, classifierTwoDisplayTextArea, true);
        applicationData.setClassifierTwoStats(classifierTwoStatsOnXValidation);
        myGraph.setMyStats(classifierTwoStatsOnXValidation);

        statusPane.setText("Done!");

        return classifierTwo;
    } catch (Exception e) {
        e.printStackTrace();
        JOptionPane.showMessageDialog(parent, e.getMessage(), "ERROR", JOptionPane.ERROR_MESSAGE);
        return null;
    }
}

From source file:sirius.trainer.step4.RunClassifierWithNoLocationIndex.java

License:Open Source License

public static Object startClassifierOneWithNoLocationIndex(JInternalFrame parent,
        ApplicationData applicationData, JTextArea classifierOneDisplayTextArea, GraphPane myGraph,
        boolean test, ClassifierResults classifierResults, int range, double threshold, String classifierName,
        String[] classifierOptions, boolean returnClassifier, GeneticAlgorithmDialog gaDialog,
        int randomNumberForClassifier) {
    try {/*from   w ww . j a v  a2  s  .co m*/

        if (gaDialog != null) {
            //Run GA then load the result maxMCCFeatures into applicationData->Dataset1Instances
            int positiveDataset1FromInt = applicationData.getPositiveDataset1FromField();
            int positiveDataset1ToInt = applicationData.getPositiveDataset1ToField();
            int negativeDataset1FromInt = applicationData.getNegativeDataset1FromField();
            int negativeDataset1ToInt = applicationData.getNegativeDataset1ToField();
            FastaFileManipulation fastaFile = new FastaFileManipulation(
                    applicationData.getPositiveStep1TableModel(), applicationData.getNegativeStep1TableModel(),
                    positiveDataset1FromInt, positiveDataset1ToInt, negativeDataset1FromInt,
                    negativeDataset1ToInt, applicationData.getWorkingDirectory());
            FastaFormat fastaFormat;
            List<FastaFormat> posFastaList = new ArrayList<FastaFormat>();
            List<FastaFormat> negFastaList = new ArrayList<FastaFormat>();
            while ((fastaFormat = fastaFile.nextSequence("pos")) != null) {
                posFastaList.add(fastaFormat);
            }
            while ((fastaFormat = fastaFile.nextSequence("neg")) != null) {
                negFastaList.add(fastaFormat);
            }
            applicationData.setDataset1Instances(
                    runDAandLoadResult(applicationData, gaDialog, posFastaList, negFastaList));
        }

        StatusPane statusPane = applicationData.getStatusPane();
        long totalTimeStart = System.currentTimeMillis(), totalTimeElapsed;
        //Setting up training data set 1 for classifier one      
        if (statusPane != null)
            statusPane.setText("Setting up...");
        //Load Dataset1 Instances
        Instances inst = new Instances(applicationData.getDataset1Instances());
        inst.setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
        applicationData.getDataset1Instances()
                .setClassIndex(applicationData.getDataset1Instances().numAttributes() - 1);
        // for recording of time
        long trainTimeStart = 0, trainTimeElapsed = 0;
        Classifier classifierOne = Classifier.forName(classifierName, classifierOptions);
        /*//Used to show the classifierName and options so that I can use them for qsub
        System.out.println(classifierName);
        String[] optionString = classifierOne.getOptions();
        for(int x = 0; x < optionString.length; x++)
           System.out.println(optionString[x]);*/
        if (statusPane != null)
            statusPane.setText("Training Classifier One... May take a while... Please wait...");
        //Record Start Time
        trainTimeStart = System.currentTimeMillis();
        //Train Classifier One            
        inst.deleteAttributeType(Attribute.STRING);
        classifierOne.buildClassifier(inst);
        //Record Total Time used to build classifier one
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;

        if (classifierResults != null) {
            classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName);
            classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                    applicationData.getWorkingDirectory() + File.separator + "Dataset1.arff");
            classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                    Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");
        }
        if (test == false) {
            //If Need Not Test option is selected
            if (statusPane != null)
                statusPane.setText("Done!");
            return classifierOne;
        }
        if (applicationData.terminateThread == true) {
            //If Stop button is pressed
            if (statusPane != null)
                statusPane.setText("Interrupted - Classifier One Training Completed");
            return classifierOne;
        }
        //Running classifier one on dataset3
        if (statusPane != null)
            statusPane.setText("Running ClassifierOne on Dataset 3..");
        int positiveDataset3FromInt = applicationData.getPositiveDataset3FromField();
        int positiveDataset3ToInt = applicationData.getPositiveDataset3ToField();
        int negativeDataset3FromInt = applicationData.getNegativeDataset3FromField();
        int negativeDataset3ToInt = applicationData.getNegativeDataset3ToField();

        //Generate the header for ClassifierOne.scores on Dataset3      
        String classifierOneFilename = applicationData.getWorkingDirectory() + File.separator + "ClassifierOne_"
                + randomNumberForClassifier + ".scores";
        BufferedWriter dataset3OutputFile = new BufferedWriter(new FileWriter(classifierOneFilename));
        FastaFileManipulation fastaFile = new FastaFileManipulation(
                applicationData.getPositiveStep1TableModel(), applicationData.getNegativeStep1TableModel(),
                positiveDataset3FromInt, positiveDataset3ToInt, negativeDataset3FromInt, negativeDataset3ToInt,
                applicationData.getWorkingDirectory());

        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        //Reading the fastaFile      
        int lineCounter = 0;
        String _class = "pos";
        int totalDataset3PositiveInstances = positiveDataset3ToInt - positiveDataset3FromInt + 1;
        FastaFormat fastaFormat;
        while ((fastaFormat = fastaFile.nextSequence(_class)) != null) {
            if (applicationData.terminateThread == true) {
                if (statusPane != null)
                    statusPane.setText("Interrupted - Classifier One Training Completed");
                dataset3OutputFile.close();
                return classifierOne;
            }
            dataset3OutputFile.write(fastaFormat.getHeader());
            dataset3OutputFile.newLine();
            dataset3OutputFile.write(fastaFormat.getSequence());
            dataset3OutputFile.newLine();
            lineCounter++;//Putting it here will mean if lineCounter is x then line == sequence x                              
            dataset3OutputFile.flush();
            if (statusPane != null)
                statusPane.setText("Running Classifier One on Dataset 3.. @ " + lineCounter + " / "
                        + applicationData.getTotalSequences(3) + " Sequences");
            Instance tempInst;
            tempInst = new Instance(inst.numAttributes());
            tempInst.setDataset(inst);
            for (int x = 0; x < inst.numAttributes() - 1; x++) {
                //-1 because class attribute can be ignored
                //Give the sequence and the featureList to get the feature freqs on the sequence
                Object obj = GenerateArff.getMatchCount(fastaFormat, featureDataArrayList.get(x),
                        applicationData.getScoringMatrixIndex(), applicationData.getCountingStyleIndex(),
                        applicationData.getScoringMatrix());
                if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                    tempInst.setValue(x, (Integer) obj);
                else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                    tempInst.setValue(x, (Double) obj);
                else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                    tempInst.setValue(x, (String) obj);
                else {
                    dataset3OutputFile.close();
                    throw new Error("Unknown: " + obj.getClass().getName());
                }
            }
            tempInst.setValue(inst.numAttributes() - 1, _class);
            double[] results = classifierOne.distributionForInstance(tempInst);
            dataset3OutputFile.write(_class + ",0=" + results[0]);
            dataset3OutputFile.newLine();
            dataset3OutputFile.flush();
            if (lineCounter == totalDataset3PositiveInstances)
                _class = "neg";
        }
        dataset3OutputFile.close();

        //Display Statistics by reading the ClassifierOne.scores
        PredictionStats classifierOneStatsOnBlindTest = new PredictionStats(classifierOneFilename, range,
                threshold);
        //display(double range)
        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        if (classifierResults != null) {
            classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                    Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                            + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");
            classifierOneStatsOnBlindTest.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        } else
            classifierOneStatsOnBlindTest.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        applicationData.setClassifierOneStats(classifierOneStatsOnBlindTest);
        if (myGraph != null)
            myGraph.setMyStats(classifierOneStatsOnBlindTest);
        if (statusPane != null)
            statusPane.setText("Done!");
        fastaFile.cleanUp();
        if (returnClassifier)
            return classifierOne;
        else
            return classifierOneStatsOnBlindTest;
    } catch (Exception ex) {
        ex.printStackTrace();
        JOptionPane.showMessageDialog(parent, ex.getMessage(), "Evaluate classifier",
                JOptionPane.ERROR_MESSAGE);
        return null;
    }
}