Example usage for weka.classifiers Classifier distributionForInstance

Introduction

In this page you can find the example usage for weka.classifiers Classifier distributionForInstance.

Prototype

public double[] distributionForInstance(Instance instance) throws Exception;

Source Link

Document

Predicts the class memberships for a given instance.

Usage

From source file:org.opentox.qsar.processors.predictors.SimplePredictor.java

License:Open Source License

/**
 * Perform the prediction which is based on the serialized model file on the server.
 * @param data/*from  w  w  w.  j a  v a 2  s.  co m*/
 *      Input data for with respect to which the predicitons are calculated
 * @return
 *      A dataset containing the compounds submitted along with their predicted values.
 * @throws QSARException
 *      In case the prediction (as a whole) is not feasible. If the prediction is not
 *      feasible for a single instance, the prediction is set to <code>?</code> (unknown/undefined/missing).
 *      If the prediction is not feasible for all instances, an exception (QSARException) is thrown.
 */
@Override
public Instances predict(final Instances data) throws QSARException {

    Instances dataClone = new Instances(data);
    /**
     * IMPORTANT!
     * String attributes have to be removed from the dataset before
     * applying the prediciton
     */
    dataClone = new AttributeCleanup(ATTRIBUTE_TYPE.string).filter(dataClone);

    /**
     * Set the class attribute of the incoming data to any arbitrary attribute
     * (Choose the last for instance).
     */
    dataClone.setClass(dataClone.attribute(model.getDependentFeature().getURI()));

    /**
     *
     * Create the Instances that will host the predictions. This object contains
     * only two attributes: the compound_uri and the target feature of the model.
     */
    Instances predictions = null;
    FastVector attributes = new FastVector();
    final Attribute compoundAttribute = new Attribute("compound_uri", (FastVector) null);
    final Attribute targetAttribute = dataClone.classAttribute();
    attributes.addElement(compoundAttribute);
    attributes.addElement(targetAttribute);

    predictions = new Instances("predictions", attributes, 0);
    predictions.setClassIndex(1);

    Instance predictionInstance = new Instance(2);
    try {
        final Classifier cls = (Classifier) SerializationHelper.read(filePath);

        for (int i = 0; i < data.numInstances(); i++) {
            try {
                String currentCompound = data.instance(i).stringValue(0);
                predictionInstance.setValue(compoundAttribute, currentCompound);

                if (targetAttribute.type() == Attribute.NUMERIC) {
                    double clsLabel = cls.classifyInstance(dataClone.instance(i));
                    predictionInstance.setValue(targetAttribute, clsLabel);
                } else if (targetAttribute.type() == Attribute.NOMINAL) {
                    double[] clsLable = cls.distributionForInstance(dataClone.instance(i));
                    int indexForNominalElement = maxInArray(clsLable).getPosition();
                    Enumeration nominalValues = targetAttribute.enumerateValues();
                    int counter = 0;
                    String nomValue = "";
                    while (nominalValues.hasMoreElements()) {
                        if (counter == indexForNominalElement) {
                            nomValue = nominalValues.nextElement().toString();
                            break;
                        }
                        counter++;
                    }
                    predictionInstance.setValue(targetAttribute, nomValue);

                    predictionInstance.setValue(targetAttribute, cls.classifyInstance(dataClone.instance(i)));
                }

                predictions.add(predictionInstance);
            } catch (Exception ex) {
                System.out.println(ex);
            }
        }

    } catch (Exception ex) {
    }

    return predictions;
}

From source file:qa.experiment.ProcessFeatureVector.java

public String trainAndPredict(String[] processNames, String question) throws Exception {
    FastVector fvWekaAttribute = generateWEKAFeatureVector(processNames);
    Instances trainingSet = new Instances("Rel", fvWekaAttribute, bowFeature.size() + 1);
    trainingSet.setClassIndex(bowFeature.size());

    int cnt = 0;//  ww  w.j  ava  2s.c o  m
    for (int i = 0; i < arrProcessFeature.size(); i++) {
        String[] names = arrProcessFeature.get(i).getProcessName().split("\\|");
        int sim = isNameFuzzyMatch(processNames, names);
        if (sim != -1) {
            // System.out.println("match " + arrProcessFeature.get(i).getProcessName());
            ArrayList<String> featureVector = arrProcessFeature.get(i).getFeatureVectors();
            for (int j = 0; j < featureVector.size(); j++) {
                Instance trainInstance = new Instance(bowFeature.size() + 1);
                String[] attrValues = featureVector.get(j).split("\t");
                // System.out.println(trainInstance.numAttributes());
                // System.out.println(fvWekaAttribute.size());
                for (int k = 0; k < bowFeature.size(); k++) {
                    trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(k),
                            Integer.parseInt(attrValues[k]));
                }
                trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(bowFeature.size()),
                        processNames[sim]);
                trainingSet.add(trainInstance);

                //System.out.println(cnt);
                cnt++;
            }
        }
    }

    Classifier cl = new NaiveBayes();
    cl.buildClassifier(trainingSet);
    Instance inst = new Instance(bowFeature.size() + 1);
    //String[] tokenArr = tokens.toArray(new String[tokens.size()]);
    for (int j = 0; j < bowFeature.size(); j++) {
        List<String> tokens = slem.tokenize(question);
        String[] tokArr = tokens.toArray(new String[tokens.size()]);
        int freq = getFrequency(bowFeature.get(j), tokArr);
        inst.setValue((Attribute) fvWekaAttribute.elementAt(j), freq);
    }

    inst.setDataset(trainingSet);
    int idxMax = ArrUtil.getIdxMax(cl.distributionForInstance(inst));
    return processNames[idxMax];
}

From source file:se.de.hu_berlin.informatik.faultlocalizer.machinelearn.WekaFaultLocalizer.java

License:Open Source License

@Override
public SBFLRanking<T> localize(final ILocalizerCache<T> localizer, ComputationStrategies strategy) {

    // == 1. Create Weka training instance

    final List<INode<T>> nodes = new ArrayList<>(localizer.getNodes());

    // nominal true/false values
    final List<String> tf = new ArrayList<>();
    tf.add("t");//from   w  ww.j a v  a 2  s .co m
    tf.add("f");

    // create an attribute for each component
    final Map<INode<T>, Attribute> attributeMap = new HashMap<>();
    final ArrayList<Attribute> attributeList = new ArrayList<>(); // NOCS: Weka needs ArrayList..
    for (final INode<T> node : nodes) {
        final Attribute attribute = new Attribute(node.toString(), tf);
        attributeList.add(attribute);
        attributeMap.put(node, attribute);
    }

    // create class attribute (trace success)
    final Attribute successAttribute = new Attribute("success", tf);
    attributeList.add(successAttribute);

    // create weka training instance
    final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1);
    trainingSet.setClassIndex(attributeList.size() - 1);

    // == 2. add traces to training set

    // add an instance for each trace
    for (final ITrace<T> trace : localizer.getTraces()) {
        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f");
        }
        instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f");
        trainingSet.add(instance);
    }

    // == 3. use prediction to localize faults

    // build classifier
    try {
        final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions,
                trainingSet);
        final SBFLRanking<T> ranking = new SBFLRanking<>();

        Log.out(this, "begin classifying");
        int classified = 0;

        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), "f");
        }
        instance.setValue(successAttribute, "f");

        for (final INode<T> node : nodes) {
            classified++;
            if (classified % 1000 == 0) {
                Log.out(this, String.format("Classified %d nodes.", classified));
            }

            // contain only the current node in the network
            instance.setValue(attributeMap.get(node), "t");

            // predict with which probability this setup leads to a failing network
            final double[] distribution = classifier.distributionForInstance(instance);
            ranking.add(node, distribution[1]);

            // reset involvment for node
            instance.setValue(attributeMap.get(node), "f");
        }
        return ranking;
    } catch (final Exception e) { // NOCS: Weka throws only raw exceptions
        throw new RuntimeException(e);
    }
}

From source file:se.de.hu_berlin.informatik.stardust.localizer.machinelearn.WekaFaultLocalizer.java

License:Open Source License

@Override
public SBFLRanking<T> localize(final ISpectra<T> spectra) {

    // == 1. Create Weka training instance

    final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes());

    // nominal true/false values
    final List<String> tf = new ArrayList<String>();
    tf.add("t");// w w  w .jav  a 2 s  .c o m
    tf.add("f");

    // create an attribute for each component
    final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>();
    final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList..
    for (final INode<T> node : nodes) {
        final Attribute attribute = new Attribute(node.toString(), tf);
        attributeList.add(attribute);
        attributeMap.put(node, attribute);
    }

    // create class attribute (trace success)
    final Attribute successAttribute = new Attribute("success", tf);
    attributeList.add(successAttribute);

    // create weka training instance
    final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1);
    trainingSet.setClassIndex(attributeList.size() - 1);

    // == 2. add traces to training set

    // add an instance for each trace
    for (final ITrace<T> trace : spectra.getTraces()) {
        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f");
        }
        instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f");
        trainingSet.add(instance);
    }

    // == 3. use prediction to localize faults

    // build classifier
    try {
        final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions,
                trainingSet);
        final SBFLRanking<T> ranking = new SBFLRanking<>();

        Log.out(this, "begin classifying");
        int classified = 0;

        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), "f");
        }
        instance.setValue(successAttribute, "f");

        for (final INode<T> node : nodes) {
            classified++;
            if (classified % 1000 == 0) {
                Log.out(this, String.format("Classified %d nodes.", classified));
            }

            // contain only the current node in the network
            instance.setValue(attributeMap.get(node), "t");

            // predict with which probability this setup leads to a failing network
            final double[] distribution = classifier.distributionForInstance(instance);
            ranking.add(node, distribution[1]);

            // reset involvment for node
            instance.setValue(attributeMap.get(node), "f");
        }
        return ranking;
    } catch (final Exception e) { // NOCS: Weka throws only raw exceptions
        throw new RuntimeException(e);
    }
}

From source file:sg.edu.nus.comp.nlp.ims.classifiers.CWekaEvaluator.java

License:Open Source License

@Override
public Object evaluate(Object p_Lexelt) throws Exception {
    ILexelt lexelt = (ILexelt) p_Lexelt;
    String lexeltID = lexelt.getID();
    IStatistic stat = (IStatistic) this.getStatistic(lexeltID);
    int type = 2;
    String firstSense = this.m_UnknownSense;
    if (stat == null) {
        type = 1;//  w  w w . j  a  va 2 s. c  om
        if (this.m_SenseIndex != null) {
            String first = this.m_SenseIndex.getFirstSense(lexeltID);
            if (first != null) {
                firstSense = first;
            }
        }
    } else {
        if (stat.getTags().size() == 1) {
            type = 1;
            firstSense = stat.getTags().iterator().next();
        } else {
            type = stat.getTags().size();
        }
    }
    int classIdx = this.m_ClassIndex;
    CResultInfo retVal = new CResultInfo();
    switch (type) {
    case 0:
        throw new Exception("no tag for lexelt " + lexeltID + ".");
    case 1:
        retVal.lexelt = lexelt.getID();
        retVal.docs = new String[lexelt.size()];
        retVal.ids = new String[lexelt.size()];
        retVal.classes = new String[] { firstSense };
        retVal.probabilities = new double[lexelt.size()][1];
        for (int i = 0; i < retVal.probabilities.length; i++) {
            retVal.probabilities[i][0] = 1;
            retVal.docs[i] = lexelt.getInstanceDocID(i);
            retVal.ids[i] = lexelt.getInstanceID(i);
        }
        break;
    default:
        lexelt.setStatistic(stat);
        Classifier classifier = (Classifier) this.getModel(lexeltID);
        ILexeltWriter lexeltWriter = new CWekaSparseLexeltWriter();
        Instances instances = (Instances) lexeltWriter.getInstances(lexelt);
        if (classIdx < 0) {
            classIdx = instances.numAttributes() - 1;
        }
        instances.setClassIndex(classIdx);
        retVal.lexelt = lexelt.getID();
        retVal.docs = new String[lexelt.size()];
        retVal.ids = new String[lexelt.size()];
        retVal.probabilities = new double[instances.numInstances()][];
        retVal.classes = new String[instances.classAttribute().numValues()];
        for (int i = 0; i < instances.classAttribute().numValues(); i++) {
            retVal.classes[i] = instances.classAttribute().value(i);
        }
        if (instances.classAttribute().isNumeric()) {
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.instance(i);
                retVal.docs[i] = lexelt.getInstanceDocID(i);
                retVal.ids[i] = lexelt.getInstanceID(i);
                retVal.probabilities[i] = new double[retVal.classes.length];
                retVal.probabilities[i][(int) classifier.classifyInstance(instance)] = 1;
            }
        } else {
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.instance(i);
                retVal.docs[i] = lexelt.getInstanceDocID(i);
                retVal.ids[i] = lexelt.getInstanceID(i);
                retVal.probabilities[i] = classifier.distributionForInstance(instance);
            }
        }
    }
    return retVal;
}

From source file:sirius.clustering.main.ClustererClassificationPane.java

License:Open Source License

private void start() {
    //Run Classifier
    if (this.inputDirectoryTextField.getText().length() == 0) {
        JOptionPane.showMessageDialog(parent, "Please set Input Directory to where the clusterer output are!",
                "Evaluate Classifier", JOptionPane.ERROR_MESSAGE);
        return;//w  ww  . j a  v  a 2s .c  o  m
    }
    if (m_ClassifierEditor.getValue() == null) {
        JOptionPane.showMessageDialog(parent, "Please choose Classifier!", "Evaluate Classifier",
                JOptionPane.ERROR_MESSAGE);
        return;
    }
    if (validateStatsSettings(1) == false) {
        return;
    }
    if (this.clusteringClassificationThread == null) {
        startButton.setEnabled(false);
        stopButton.setEnabled(true);
        tabbedClassifierPane.setSelectedIndex(0);
        this.clusteringClassificationThread = (new Thread() {
            public void run() {
                //Clear the output text area
                levelOneClassifierOutputTextArea.setText("");
                resultsTableModel.reset();
                //double threshold = Double.parseDouble(classifierOneThresholdTextField.getText());                
                //cross-validation
                int numFolds;
                if (jackKnifeRadioButton.isSelected())
                    numFolds = -1;
                else
                    numFolds = Integer.parseInt(foldsField.getText());
                StringTokenizer st = new StringTokenizer(inputDirectoryTextField.getText(), File.separator);
                String filename = "";
                while (st.hasMoreTokens()) {
                    filename = st.nextToken();
                }
                StringTokenizer st2 = new StringTokenizer(filename, "_.");
                numOfCluster = 0;
                if (st2.countTokens() >= 2) {
                    st2.nextToken();
                    String numOfClusterString = st2.nextToken().replaceAll("cluster", "");
                    try {
                        numOfCluster = Integer.parseInt(numOfClusterString);
                    } catch (NumberFormatException e) {
                        JOptionPane.showMessageDialog(parent,
                                "Please choose the correct file! (Output from Utilize Clusterer)", "ERROR",
                                JOptionPane.ERROR_MESSAGE);
                    }
                }
                Classifier template = (Classifier) m_ClassifierEditor.getValue();

                for (int x = 0; x <= numOfCluster && clusteringClassificationThread != null; x++) {//Test each cluster
                    try {
                        long totalTimeStart = 0, totalTimeElapsed = 0;
                        totalTimeStart = System.currentTimeMillis();
                        statusLabel.setText("Reading in cluster" + x + " file..");
                        String inputFilename = inputDirectoryTextField.getText()
                                .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".arff");
                        String outputScoreFilename = inputDirectoryTextField.getText()
                                .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".score");
                        BufferedWriter output = new BufferedWriter(new FileWriter(outputScoreFilename));
                        Instances inst = new Instances(new FileReader(inputFilename));
                        //Assume that class attribute is the last attribute - This should be the case for all Sirius produced Arff files
                        inst.setClassIndex(inst.numAttributes() - 1);
                        Random random = new Random(1);//Simply set to 1, shall implement the random seed option later
                        inst.randomize(random);
                        if (inst.attribute(inst.classIndex()).isNominal())
                            inst.stratify(numFolds);
                        // for timing
                        ClassifierResults classifierResults = new ClassifierResults(false, 0);
                        String classifierName = m_ClassifierEditor.getValue().getClass().getName();
                        classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ",
                                classifierName);
                        classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                                inputFilename);
                        classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                                "NA");
                        //ArrayList<Double> resultList = new ArrayList<Double>();     
                        if (jackKnifeRadioButton.isSelected() || numFolds > inst.numInstances() - 1)
                            numFolds = inst.numInstances() - 1;
                        for (int fold = 0; fold < numFolds && clusteringClassificationThread != null; fold++) {//Doing cross-validation          
                            statusLabel.setText("Cluster: " + x + " - Training Fold " + (fold + 1) + "..");
                            Instances train = inst.trainCV(numFolds, fold, random);
                            Classifier current = null;
                            try {
                                current = Classifier.makeCopy(template);
                                current.buildClassifier(train);
                                Instances test = inst.testCV(numFolds, fold);
                                statusLabel.setText("Cluster: " + x + " - Testing Fold " + (fold + 1) + "..");
                                for (int jj = 0; jj < test.numInstances(); jj++) {
                                    double[] result = current.distributionForInstance(test.instance(jj));
                                    output.write("Cluster: " + x);
                                    output.newLine();
                                    output.newLine();
                                    output.write(test.instance(jj).stringValue(test.classAttribute()) + ",0="
                                            + result[0]);
                                    output.newLine();
                                }
                            } catch (Exception ex) {
                                ex.printStackTrace();
                                statusLabel.setText("Error in cross-validation!");
                                startButton.setEnabled(true);
                                stopButton.setEnabled(false);
                            }
                        }
                        output.close();
                        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
                        classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                                Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                                        + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2)
                                        + " seconds");
                        double threshold = validateFieldAsThreshold(classifierOneThresholdTextField.getText(),
                                "Threshold Field", classifierOneThresholdTextField);
                        String filename2 = inputDirectoryTextField.getText()
                                .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".score");
                        PredictionStats classifierStats = new PredictionStats(filename2, 0, threshold);

                        resultsTableModel.add("Cluster " + x, classifierResults, classifierStats);
                        resultsTable.setRowSelectionInterval(x, x);
                        computeStats(numFolds);//compute and display the results                    
                    } catch (Exception e) {
                        e.printStackTrace();
                        statusLabel.setText("Error in reading file!");
                        startButton.setEnabled(true);
                        stopButton.setEnabled(false);
                    }
                } //end of cluster for loop

                resultsTableModel.add("Summary - Equal Weightage", null, null);
                resultsTable.setRowSelectionInterval(numOfCluster + 1, numOfCluster + 1);
                computeStats(numFolds);
                resultsTableModel.add("Summary - Weighted Average", null, null);
                resultsTable.setRowSelectionInterval(numOfCluster + 2, numOfCluster + 2);
                computeStats(numFolds);

                if (clusteringClassificationThread != null)
                    statusLabel.setText("Done!");
                else
                    statusLabel.setText("Interrupted..");
                startButton.setEnabled(true);
                stopButton.setEnabled(false);
                if (classifierOne != null) {
                    levelOneClassifierOutputScrollPane.getVerticalScrollBar()
                            .setValue(levelOneClassifierOutputScrollPane.getVerticalScrollBar().getMaximum());
                }
                clusteringClassificationThread = null;

            }
        });
        this.clusteringClassificationThread.setPriority(Thread.MIN_PRIORITY);
        this.clusteringClassificationThread.start();
    } else {
        JOptionPane.showMessageDialog(parent,
                "Cannot start new job as previous job still running. Click stop to terminate previous job",
                "ERROR", JOptionPane.ERROR_MESSAGE);
    }
}

From source file:sirius.predictor.main.PredictorFrame.java

License:Open Source License

private void runType3Classifier(ClassifierData classifierData) {
    /*//from  ww  w .j a v a 2  s .  c  o  m
     * This is for type3 classifier
     * Note that all position and motif list only does not apply to this classifier as
     * it will only give one score for each sequence
     */
    if (sequenceNameTableModel.getRowCount() < 1) {
        JOptionPane.showMessageDialog(this, "Please load File first!", "No Sequence",
                JOptionPane.INFORMATION_MESSAGE);
        return;
    }
    if (loadFastaFileMenuItem.getState() == false) {
        JOptionPane.showMessageDialog(this, "Please load Fasta File! Currently, you have score file!",
                "Wrong File Format", JOptionPane.INFORMATION_MESSAGE);
        return;
    }
    if (onAllPositionsMenuItem.getState() == false) {
        JOptionPane.showMessageDialog(this, "For type 3 classifier, it make only one prediction a sequence",
                "Information", JOptionPane.INFORMATION_MESSAGE);
    }
    try {
        BufferedWriter output = new BufferedWriter(new FileWriter(
                outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_"
                        + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
        Classifier classifierOne = classifierData.getClassifierOne();
        //Reading and Storing the featureList
        Instances inst = classifierData.getInstances();
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }
        //Going through each and every sequence
        for (int x = 0; x < sequenceNameTableModel.getRowCount(); x++) {
            if (stopClassifier == true) {
                statusPane.setText("Running of Classifier Stopped!");
                stopClassifier = false;
                output.close();
                return;
            }
            //if(x%100 == 0)
            statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierOne @ " + x
                    + " / " + sequenceNameTableModel.getRowCount());
            //Header              
            output.write(sequenceNameTableModel.getHeader(x));
            output.newLine();
            output.write(sequenceNameTableModel.getSequence(x));
            output.newLine();
            //Sequence Score -> index-score, index-score
            String sequence = sequenceNameTableModel.getSequence(x);
            Instance tempInst;
            tempInst = new Instance(inst.numAttributes());
            tempInst.setDataset(inst);
            for (int z = 0; z < inst.numAttributes() - 1; z++) {
                //-1 because class attribute can be ignored
                //Give the sequence and the featureList to get the feature freqs on the sequence
                Object obj = GenerateArff.getMatchCount("+1_Index(-1)", sequence, featureDataArrayList.get(z),
                        classifierData.getScoringMatrixIndex(), classifierData.getCountingStyleIndex(),
                        classifierData.getScoringMatrix());
                if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                    tempInst.setValue(z, (Integer) obj);
                else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                    tempInst.setValue(z, (Double) obj);
                else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                    tempInst.setValue(z, (String) obj);
                else {
                    output.close();
                    throw new Error("Unknown: " + obj.getClass().getName());
                }
            }
            //note that pos or neg does not matter as this is not used
            tempInst.setValue(inst.numAttributes() - 1, "pos");
            try {
                double[] results = classifierOne.distributionForInstance(tempInst);
                output.write("0=" + results[0]);
            } catch (Exception e) {
                //this is to ensure that the run will continue              
                output.write("0=-0.0");
                //change throw error to screen output if i want the run to continue
                System.err
                        .println("Exception has Occurred for classifierOne.distributionForInstance(tempInst);");
            }
            output.newLine();
            output.flush();
        }
        output.flush();
        output.close();

        statusPane.setText("ClassifierOne finished running...");
        loadScoreFile(outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName()
                + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores");
    } catch (Exception e) {
        JOptionPane.showMessageDialog(null, "Exception Occured", "Error", JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    }
}

From source file:sirius.predictor.main.PredictorFrame.java

License:Open Source License

private void runClassifier(ClassifierData classifierData, boolean allPositions) {
    //this method is for type 1 classifier with all positions and motif list
    //and type 2 classifier with all positions
    if (sequenceNameTableModel.getRowCount() < 1) {
        JOptionPane.showMessageDialog(this, "Please load File first!", "No Sequence",
                JOptionPane.INFORMATION_MESSAGE);
        return;// w  ww.j a v a 2s. co m
    }
    if (loadFastaFileMenuItem.getState() == false) {
        JOptionPane.showMessageDialog(this, "Please load Fasta File! Currently, you have score file!",
                "Wrong File Format", JOptionPane.INFORMATION_MESSAGE);
        return;
    }
    if (onAllPositionsMenuItem.getState() == false && motifListTableModel.getSize() == 0) {
        JOptionPane.showMessageDialog(this, "There are no Motifs chosen in Motif List!", "No Motifs",
                JOptionPane.INFORMATION_MESSAGE);
        MotifListDialog dialog = new MotifListDialog(motifListTableModel);
        dialog.setLocationRelativeTo(this);
        dialog.setVisible(true);
        return;
    }
    while (outputDirectory == null) {
        JOptionPane.showMessageDialog(this, "Please set output directory first!", "Output Directory not set",
                JOptionPane.INFORMATION_MESSAGE);
        setOutputDirectory();
        //return;
    }
    try {
        BufferedWriter output = new BufferedWriter(new FileWriter(
                outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_"
                        + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
        Classifier classifierOne = classifierData.getClassifierOne();
        int leftMostPosition = classifierData.getLeftMostPosition();
        int rightMostPosition = classifierData.getRightMostPosition();
        //Reading and Storing the featureList
        Instances inst = classifierData.getInstances();
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        for (int x = 0; x < sequenceNameTableModel.getRowCount(); x++) {
            if (stopClassifier == true) {
                statusPane.setText("Running of Classifier Stopped!");
                stopClassifier = false;
                output.close();
                return;
            }
            //if(x%100 == 0)
            statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierOne @ " + x
                    + " / " + sequenceNameTableModel.getRowCount());
            //Header              
            output.write(sequenceNameTableModel.getHeader(x));
            output.newLine();
            output.write(sequenceNameTableModel.getSequence(x));
            output.newLine();
            //Sequence Score -> index-score, index-score
            String sequence = sequenceNameTableModel.getSequence(x);
            int minSequenceLengthRequired;
            int targetLocationIndex;
            if (leftMostPosition < 0 && rightMostPosition > 0) {// -ve and +ve
                minSequenceLengthRequired = (leftMostPosition * -1) + rightMostPosition;
                targetLocationIndex = (leftMostPosition * -1);
            } else if (leftMostPosition < 0 && rightMostPosition < 0) {//-ve and -ve
                minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1;
                targetLocationIndex = (leftMostPosition * -1);
            } else {//+ve and +ve
                minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1;
                targetLocationIndex = (leftMostPosition * -1);
            }
            boolean firstEntryForClassifierOne = true;
            for (int y = 0; y + (minSequenceLengthRequired - 1) < sequence.length(); y++) {
                //Check if targetLocation match any motif in motif List
                if (allPositions == false && motifListTableModel
                        .gotMotifMatch(sequence.substring(y + 0, y + targetLocationIndex)) == false)
                    continue;
                String line2 = sequence.substring(y + 0, y + minSequenceLengthRequired);
                Instance tempInst;
                tempInst = new Instance(inst.numAttributes());
                tempInst.setDataset(inst);
                for (int z = 0; z < inst.numAttributes() - 1; z++) {
                    //-1 because class attribute can be ignored
                    //Give the sequence and the featureList to get the feature freqs on the sequence
                    Object obj = GenerateArff.getMatchCount("+1_Index(" + targetLocationIndex + ")", line2,
                            featureDataArrayList.get(z), classifierData.getScoringMatrixIndex(),
                            classifierData.getCountingStyleIndex(), classifierData.getScoringMatrix());
                    if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                        tempInst.setValue(z, (Integer) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                        tempInst.setValue(z, (Double) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                        tempInst.setValue(z, (String) obj);
                    else {
                        output.close();
                        throw new Error("Unknown: " + obj.getClass().getName());
                    }
                }
                //note that pos or neg does not matter as this is not used
                tempInst.setValue(inst.numAttributes() - 1, "neg");
                double[] results = classifierOne.distributionForInstance(tempInst);
                if (firstEntryForClassifierOne)
                    firstEntryForClassifierOne = false;
                else
                    output.write(",");
                output.write(y + targetLocationIndex + "=" + results[0]);
            }
            output.newLine();
            output.flush();
        }
        output.flush();
        output.close();

        statusPane.setText("ClassifierOne finished running...");

        //Run classifier Two if it is type 2
        if (classifierData.getClassifierType() == 2) {
            BufferedWriter output2 = new BufferedWriter(new FileWriter(
                    outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName()
                            + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
            BufferedReader input2 = new BufferedReader(new FileReader(
                    outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName()
                            + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
            Classifier classifierTwo = classifierData.getClassifierTwo();
            Instances inst2 = classifierData.getInstances2();
            int setUpstream = classifierData.getSetUpstream();
            int setDownstream = classifierData.getSetDownstream();
            int minScoreWindowRequired;
            if (setUpstream < 0 && setDownstream < 0) {//-ve and -ve
                minScoreWindowRequired = setDownstream - setUpstream + 1;
            } else if (setUpstream < 0 && setDownstream > 0) {//-ve and +ve
                minScoreWindowRequired = (setUpstream * -1) + setDownstream;
            } else {//+ve and +ve
                minScoreWindowRequired = setDownstream - setUpstream + 1;
            }
            String lineHeader;
            String lineSequence;
            int lineCounter2 = 0;
            while ((lineHeader = input2.readLine()) != null) {
                if (stopClassifier == true) {
                    statusPane.setText("Running of Classifier Stopped!");
                    stopClassifier = false;
                    output2.close();
                    input2.close();
                    return;
                }
                //if(lineCounter2%100 == 0)
                statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierTwo @ "
                        + lineCounter2 + " / " + sequenceNameTableModel.getRowCount());
                lineSequence = input2.readLine();
                output2.write(lineHeader);
                output2.newLine();
                output2.write(lineSequence);
                output2.newLine();
                StringTokenizer locationScore = new StringTokenizer(input2.readLine(), ",");
                int totalTokens = locationScore.countTokens();
                String[][] scores = new String[totalTokens][2];
                int scoreIndex = 0;
                while (locationScore.hasMoreTokens()) {
                    StringTokenizer locationScoreToken = new StringTokenizer(locationScore.nextToken(), "=");
                    scores[scoreIndex][0] = locationScoreToken.nextToken();//location
                    scores[scoreIndex][1] = locationScoreToken.nextToken();//score
                    scoreIndex++;
                }
                int targetLocationIndex2;
                if (setUpstream == 0 || setDownstream == 0) {
                    output2.close();
                    input2.close();
                    throw new Exception("setUpstream == 0 || setDownstream == 0");
                }
                if (setUpstream < 0) {
                    targetLocationIndex2 = Integer.parseInt(scores[0][0]) + (-setUpstream);
                } else {//setUpstream > 0
                    targetLocationIndex2 = Integer.parseInt(scores[0][0]); //first location
                }
                for (int x = 0; x + minScoreWindowRequired - 1 < totalTokens; x++) {
                    //+1 is for the class index
                    if (x != 0)
                        output2.write(",");
                    Instance tempInst2 = new Instance(minScoreWindowRequired + 1);
                    tempInst2.setDataset(inst2);
                    for (int y = 0; y < minScoreWindowRequired; y++) {
                        tempInst2.setValue(y, Double.parseDouble(scores[x + y][1]));
                    }
                    tempInst2.setValue(tempInst2.numAttributes() - 1, "pos");
                    double[] results = classifierTwo.distributionForInstance(tempInst2);
                    output2.write(targetLocationIndex2 + "=" + results[0]);
                    targetLocationIndex2++;
                }
                lineCounter2++;
                output2.newLine();
            }
            input2.close();
            output2.close();
            statusPane.setText("ClassifierTwo finished running...");
        }
        if (classifierData.getClassifierType() == 1)
            loadScoreFile(
                    outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName()
                            + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores");
        else
            loadScoreFile(
                    outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName()
                            + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores");
    } catch (Exception e) {
        JOptionPane.showMessageDialog(null, "Exception Occured", "Error", JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    }
}

From source file:sirius.predictor.main.PredictorFrame.java

License:Open Source License

private void runType2ClassifierWithMotifList(ClassifierData classifierData) {
    //Checking..       
    if (sequenceNameTableModel.getRowCount() < 1) {
        JOptionPane.showMessageDialog(this, "Please load File first!", "No Sequence",
                JOptionPane.INFORMATION_MESSAGE);
        return;/*w w  w .j a v a 2s . c  om*/
    }
    if (loadFastaFileMenuItem.getState() == false) {
        JOptionPane.showMessageDialog(this, "Please load Fasta File! Currently, you have score file!",
                "Wrong File Format", JOptionPane.INFORMATION_MESSAGE);
        return;
    }
    if (motifListTableModel.getSize() == 0) {
        JOptionPane.showMessageDialog(this, "There are no Motifs chosen in Motif List!", "No Motifs",
                JOptionPane.INFORMATION_MESSAGE);
        MotifListDialog dialog = new MotifListDialog(motifListTableModel);
        dialog.setLocationRelativeTo(this);
        dialog.setVisible(true);
        return;
    }
    //Proper running start
    try {
        //classifierOne score output
        BufferedWriter output = new BufferedWriter(new FileWriter(
                outputDirectory + File.separator + "classifierone_" + classifierData.getClassifierName() + "_"
                        + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
        Classifier classifierOne = classifierData.getClassifierOne();
        int leftMostPosition = classifierData.getLeftMostPosition();
        int rightMostPosition = classifierData.getRightMostPosition();
        //Reading and Storing the featureList
        Instances inst = classifierData.getInstances();
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }
        //initialization for type 2 classifier               
        BufferedWriter output2 = new BufferedWriter(new FileWriter(
                outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName() + "_"
                        + classifierData.getClassifierType() + "_" + fastaFilename + ".scores"));
        int setUpstream = classifierData.getSetUpstream();
        int setDownstream = classifierData.getSetDownstream();
        int minScoreWindowRequired;
        if (setUpstream < 0 && setDownstream < 0) {//-ve and -ve
            minScoreWindowRequired = setDownstream - setUpstream + 1;
        } else if (setUpstream < 0 && setDownstream > 0) {//-ve and +ve
            minScoreWindowRequired = (setUpstream * -1) + setDownstream;
        } else {//+ve and +ve
            minScoreWindowRequired = setDownstream - setUpstream + 1;
        }
        Classifier classifierTwo = classifierData.getClassifierTwo();
        Instances inst2 = classifierData.getInstances2();
        if (setUpstream == 0 || setDownstream == 0) {
            output.close();
            output2.close();
            throw new Exception("setUpstream == 0 || setDownstream == 0");
        }
        //for each sequence
        for (int x = 0; x < sequenceNameTableModel.getRowCount(); x++) {
            if (stopClassifier == true) {
                statusPane.setText("Running of Classifier Stopped!");
                stopClassifier = false;
                output.close();
                output2.close();
                return;
            }
            //if(x%100 == 0)
            statusPane.setText("Running " + classifierData.getClassifierName() + " - ClassifierOne @ " + x
                    + " / " + sequenceNameTableModel.getRowCount());
            //Header              
            output.write(sequenceNameTableModel.getHeader(x));
            output.newLine();
            output.write(sequenceNameTableModel.getSequence(x));
            output.newLine();
            output2.write(sequenceNameTableModel.getHeader(x));
            output2.newLine();
            output2.write(sequenceNameTableModel.getSequence(x));
            output2.newLine();
            //Sequence Score -> index-score, index-score
            String sequence = sequenceNameTableModel.getSequence(x);
            int minSequenceLengthRequired;
            int targetLocationIndex;
            //set the targetLocationIndex and minSequenceLengthRequired
            if (leftMostPosition < 0 && rightMostPosition > 0) {// -ve and +ve
                minSequenceLengthRequired = (leftMostPosition * -1) + rightMostPosition;
                targetLocationIndex = (leftMostPosition * -1);
            } else if (leftMostPosition < 0 && rightMostPosition < 0) {//-ve and -ve
                minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1;
                targetLocationIndex = (leftMostPosition * -1);
            } else {//+ve and +ve
                minSequenceLengthRequired = rightMostPosition - leftMostPosition + 1;
                targetLocationIndex = (leftMostPosition * -1);
            }
            //This hashtable is used to ensure that on positions where predictions are already made,
            //we just skip. This will happen only if it is a type 2 classifier
            Hashtable<Integer, Double> scoreTable = new Hashtable<Integer, Double>();
            boolean firstEntryForClassifierOne = true;
            boolean firstEntryForClassifierTwo = true;
            for (int y = 0; y + (minSequenceLengthRequired - 1) < sequence.length(); y++) {
                int endPoint = y;//endPoint should be the exact position
                int currentY = y;
                int startPoint = y;
                //run only on Motifs?               
                if (onMotifsOnlyMenuItem.getState()) {
                    //Check if targetLocation match any motif in motif List
                    if (motifListTableModel
                            .gotMotifMatch(sequence.substring(y + 0, y + targetLocationIndex)) == false)
                        continue; //position not found in motif list
                    else
                        //rollback to upstream and make prediction all the way till downstream
                        //needed for type 2 classifier
                        currentY += setUpstream;
                    if (setUpstream > 0)
                        currentY--;
                    startPoint = currentY;
                    //note that y starts from 0 so y is surely >= 0
                    endPoint += setDownstream;
                    if (setDownstream > 0)
                        endPoint--;
                    //check still within bound of the sequence
                    if (startPoint < 0 || endPoint >= sequence.length() - (minSequenceLengthRequired - 1))
                        continue;//out of bounds                  
                }
                while (currentY <= endPoint) {
                    if (scoreTable.get(currentY + targetLocationIndex) != null) {
                        currentY++;
                        continue;
                    }
                    String line2 = sequence.substring(currentY + 0, currentY + minSequenceLengthRequired);
                    Instance tempInst;
                    tempInst = new Instance(inst.numAttributes());
                    tempInst.setDataset(inst);
                    for (int z = 0; z < inst.numAttributes() - 1; z++) {
                        //-1 because class attribute can be ignored
                        //Give the sequence and the featureList to get the feature freqs on the sequence
                        Object obj = GenerateArff.getMatchCount("+1_Index(" + targetLocationIndex + ")", line2,
                                featureDataArrayList.get(z), classifierData.getScoringMatrixIndex(),
                                classifierData.getCountingStyleIndex(), classifierData.getScoringMatrix());
                        if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                            tempInst.setValue(z, (Integer) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                            tempInst.setValue(z, (Double) obj);
                        else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                            tempInst.setValue(z, (String) obj);
                        else {
                            output.close();
                            output2.close();
                            throw new Error("Unknown: " + obj.getClass().getName());
                        }
                    }
                    //note that pos or neg does not matter as this is not used
                    tempInst.setValue(inst.numAttributes() - 1, "neg");
                    double[] results = classifierOne.distributionForInstance(tempInst);
                    if (firstEntryForClassifierOne)
                        firstEntryForClassifierOne = false;
                    else
                        output.write(",");
                    output.write(currentY + targetLocationIndex + "=" + results[0]);
                    scoreTable.put(currentY + targetLocationIndex, results[0]);
                    currentY++;
                }
                Instance tempInst2 = new Instance(minScoreWindowRequired + 1);//+1 for class attribute
                tempInst2.setDataset(inst2);
                int indexForClassifier2Inst = 0;
                for (int z = startPoint; z <= endPoint; z++) {
                    tempInst2.setValue(indexForClassifier2Inst, scoreTable.get(targetLocationIndex + z));
                    indexForClassifier2Inst++;
                }
                //note that pos or neg does not matter as this is not used
                tempInst2.setValue(tempInst2.numAttributes() - 1, "pos");
                double[] results = classifierTwo.distributionForInstance(tempInst2);
                if (firstEntryForClassifierTwo == true)
                    firstEntryForClassifierTwo = false;
                else
                    output2.write(",");
                output2.write(y + targetLocationIndex + "=" + results[0]);
            } //end of for loop            
            output2.newLine();
            output2.flush();
            output.newLine();
            output.flush();
        }
        output.close();
        output2.close();

        statusPane.setText("Classifier Finished running...");
        loadScoreFile(outputDirectory + File.separator + "classifiertwo_" + classifierData.getClassifierName()
                + "_" + classifierData.getClassifierType() + "_" + fastaFilename + ".scores");
    } catch (Exception e) {
        JOptionPane.showMessageDialog(null, "Exception Occured", "Error", JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    }
}

From source file:sirius.trainer.step4.DatasetGenerator.java

License:Open Source License

public static boolean generateDataset2(JInternalFrame parent, ApplicationData applicationData,
        int classifierTwoUpstream, int classifierTwoDownstream, Classifier classifierOne) {
    try {//from  w  w  w. j  a va  2  s .c  o  m
        StatusPane statusPane = applicationData.getStatusPane();

        int positiveDataset2FromInt = applicationData.getPositiveDataset2FromField();
        int positiveDataset2ToInt = applicationData.getPositiveDataset2ToField();
        int negativeDataset2FromInt = applicationData.getNegativeDataset2FromField();
        int negativeDataset2ToInt = applicationData.getNegativeDataset2ToField();

        int totalDataset2PositiveInstances = positiveDataset2ToInt - positiveDataset2FromInt + 1;
        int totalDataset2NegativeInstances = negativeDataset2ToInt - negativeDataset2FromInt + 1;
        int totalDataset2Instances = totalDataset2PositiveInstances + totalDataset2NegativeInstances;

        int scoringMatrixIndex = applicationData.getScoringMatrixIndex();
        int countingStyleIndex = applicationData.getCountingStyleIndex();

        //Generate the header for Dataset2.arff
        BufferedWriter dataset2OutputFile = new BufferedWriter(
                new FileWriter(applicationData.getWorkingDirectory() + File.separator + "Dataset2.arff"));
        dataset2OutputFile.write("@relation 'Dataset2.arff' ");
        dataset2OutputFile.newLine();
        dataset2OutputFile.newLine();
        dataset2OutputFile.flush();
        for (int x = classifierTwoUpstream; x <= classifierTwoDownstream; x++) {
            if (x != 0) {//This statment is used because in sequence position only -1,+1 dun have 0
                dataset2OutputFile.write("@attribute (" + x + ") numeric");
                dataset2OutputFile.newLine();
                dataset2OutputFile.flush();
            }
        }
        if (positiveDataset2FromInt > 0 && negativeDataset2FromInt > 0)
            dataset2OutputFile.write("@attribute Class {pos,neg}");
        else if (positiveDataset2FromInt > 0 && negativeDataset2FromInt == 0)
            dataset2OutputFile.write("@attribute Class {pos}");
        else if (positiveDataset2FromInt == 0 && negativeDataset2FromInt > 0)
            dataset2OutputFile.write("@attribute Class {neg}");
        dataset2OutputFile.newLine();
        dataset2OutputFile.newLine();
        dataset2OutputFile.write("@data");
        dataset2OutputFile.newLine();
        dataset2OutputFile.newLine();
        dataset2OutputFile.flush();

        //Generating an Instance given a sequence with the current attributes
        //for dataset2.arff

        //Need this for parameter setting for tempInst
        Instances inst = applicationData.getDataset1Instances();
        inst.deleteAttributeType(Attribute.STRING);
        FastaFileManipulation fastaFile = new FastaFileManipulation(
                applicationData.getPositiveStep1TableModel(), applicationData.getNegativeStep1TableModel(),
                positiveDataset2FromInt, positiveDataset2ToInt, negativeDataset2FromInt, negativeDataset2ToInt,
                applicationData.getWorkingDirectory());

        //Reading and Storing the featureList
        ArrayList<Feature> featureDataArrayList = new ArrayList<Feature>();
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            //-1 because class attribute must be ignored
            featureDataArrayList.add(Feature.levelOneClassifierPane(inst.attribute(x).name()));
        }

        //Reading the fastaFile         
        int lineCounter = 0;
        String _class = "pos";
        FastaFormat fastaFormat;
        while ((fastaFormat = fastaFile.nextSequence(_class)) != null) {
            if (applicationData.terminateThread == true) {
                statusPane.setText("Interrupted - Classifier Two Training Not Complete");
                dataset2OutputFile.close();
                return false;
            }
            lineCounter++;//Putting it here will mean if lineCounter is x then line == sequence x
            //if((lineCounter % 100) == 0){                 
            dataset2OutputFile.flush();
            statusPane.setText("Generating Dataset2.arff.. @ " + lineCounter + " / " + totalDataset2Instances
                    + " Sequences");
            //}
            //For each sequence, you want to shift from upstream till downstream 
            //ie changing the +1 location
            //to get the scores given by classifier one so that you can use it to train classifier two later
            //Doing shift from upstream till downstream                             
            SequenceManipulation seq = new SequenceManipulation(fastaFormat.getSequence(),
                    classifierTwoUpstream, classifierTwoDownstream);
            String line2;
            while ((line2 = seq.nextShift()) != null) {
                Instance tempInst;
                tempInst = new Instance(inst.numAttributes());
                tempInst.setDataset(inst);
                for (int x = 0; x < inst.numAttributes() - 1; x++) {
                    //-1 because class attribute can be ignored
                    //Give the sequence and the featureList to get the feature freqs on the sequence
                    Object obj = GenerateArff.getMatchCount(fastaFormat.getHeader(), line2,
                            featureDataArrayList.get(x), scoringMatrixIndex, countingStyleIndex,
                            applicationData.getScoringMatrix());
                    if (obj.getClass().getName().equalsIgnoreCase("java.lang.Integer"))
                        tempInst.setValue(x, (Integer) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.Double"))
                        tempInst.setValue(x, (Double) obj);
                    else if (obj.getClass().getName().equalsIgnoreCase("java.lang.String"))
                        tempInst.setValue(x, (String) obj);
                    else {
                        dataset2OutputFile.close();
                        throw new Error("Unknown: " + obj.getClass().getName());
                    }
                }
                tempInst.setValue(inst.numAttributes() - 1, _class);
                double[] results = classifierOne.distributionForInstance(tempInst);
                dataset2OutputFile.write("" + results[0] + ",");
            }
            dataset2OutputFile.write(_class);
            dataset2OutputFile.newLine();
            if (lineCounter == totalDataset2PositiveInstances)
                _class = "neg";
        }
        dataset2OutputFile.close();
        fastaFile.cleanUp();
    } catch (Exception e) {
        e.printStackTrace();
        JOptionPane.showMessageDialog(parent, e.getMessage(), "ERROR", JOptionPane.ERROR_MESSAGE);
        applicationData.getStatusPane().setText("Error - Classifier Two Training Not Complete");
        return false;
    }
    return true;
}