fall2015.b565.wisBreastCancer.Assignment2.java Source code

Java tutorial

Introduction

Here is the source code for fall2015.b565.wisBreastCancer.Assignment2.java

Source

/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
*/

package fall2015.b565.wisBreastCancer;

import com.google.common.primitives.Ints;
import fall2015.b565.wisBreastCancer.utils.Constants;
import org.apache.commons.cli.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashSet;

public class Assignment2 {
    private static final Logger logger = LoggerFactory.getLogger(Assignment2.class);
    private static boolean correlation = false;
    private static boolean ppv = false;
    private static boolean powerSetPPV = false;
    private static boolean vfoldCrossValidation = false;
    private static boolean useReplaceDataSet = true;
    private static String cleanedFilePath;
    private static int[] allAttributeHeaders = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };

    public static void main(String[] args) throws Exception {
        try {
            parseArguments(args);
            FileReader fileReader = new FileReader();
            if (useReplaceDataSet) {
                cleanedFilePath = Constants.REPLACED_DATA_FILE_PATH;
            } else {
                cleanedFilePath = Constants.REMOVED_DATA_FILE_PATH;
            }
            KMeans kMeans = new KMeans();
            System.out.println("=============== Pre-Processing of Data ===============");
            fileReader.cleanDataSet();
            System.out.println("=============== Data Cleaned ===============");
            if (correlation) {
                System.out.println("=============== Finding Correlation between attributes ===============");
                kMeans.findAttributeCorrelations();
            }
            if (ppv) {
                System.out.println("=============== Finding PPV considering all the attributes ===============");
                KMeansResult kMeansResult = kMeans.findKmeansToAllAttributes(cleanedFilePath);
                double ppv = kMeans.calculatePPV(kMeansResult.getFinalCentroids(),
                        kMeans.getRecords(cleanedFilePath));
                System.out.println("Calculated PPV : " + ppv);
            }
            if (powerSetPPV) {
                System.out.println(
                        "=============== Finding PPV considering power set of the attributes ===============");
                kMeans.findKmeansToAttributePowerSet(cleanedFilePath);
            }
            if (vfoldCrossValidation) {
                System.out.println(
                        "=============== Finding V Fold cross validation considering all the attribute set ===============");
                KMeansResult kMeansResult = kMeans.findKmeansToAllAttributes(cleanedFilePath);
                HashSet<Integer> attributes = new HashSet<Integer>(Ints.asList(allAttributeHeaders));
                double vPPV = kMeans.vFoldCrossValidation(kMeansResult.getInitialRecords(), attributes);
                System.out.println("VFold cross validation PPV : " + vPPV);
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static void parseArguments(String[] args) throws Exception {
        try {
            Options options = new Options();

            options.addOption("c", false, "To find correlation between the attributes");
            options.addOption("ppv", true,
                    "Find ppv when considering all the attributes. You should provide which cleaning method to be used - removing (rm), replace (rp)");
            options.addOption("powPPV", true,
                    "Find ppv of the power set of the attributes.  You should provide which cleaning method to be used - removing (rm), replace (rp)");
            options.addOption("vfold", true,
                    "V Fold cross validation.  You should provide which cleaning method to be used - removing (rm), replace (rp)");

            CommandLineParser parser = new PosixParser();
            CommandLine cmd = parser.parse(options, args);
            if (cmd.getOptions() == null || cmd.getOptions().length == 0) {
                System.out.println(
                        "You have not specified any options. Please provide one of the options : c, ppv, powPPV or vfold");
                throw new Exception(
                        "You have not specified any options. Please provide one of the options : c, ppv, powPPV or vfold");
            }
            if (cmd.hasOption("c")) {
                logger.info("Finding correlation between attributes...");
                correlation = true;
            } else if (cmd.hasOption("ppv")) {
                String optionValue = cmd.getOptionValue("ppv");
                if (optionValue == null) {
                    System.out.println(
                            "User forget to give which data cleaning method to use. Hence we assume, we use replace data set");
                } else
                    useReplaceDataSet = !optionValue.equals("rm");

                System.out.println("Finding PPV considering all the attributes with data clean method : "
                        + optionValue + "...");
                ppv = true;
            } else if (cmd.hasOption("powPPV")) {
                String optionValue = cmd.getOptionValue("powPPV");
                if (optionValue == null) {
                    System.out.println(
                            "User forget to give which data cleaning method to use. Hence we assume, we use replace data set");
                } else
                    useReplaceDataSet = !optionValue.equals("rm");
                System.out.println("Finding PPV for the power set of the attributes with data clean method : "
                        + optionValue + "...");
                powerSetPPV = true;
            } else if (cmd.hasOption("vfold")) {
                String optionValue = cmd.getOptionValue("vfold");
                if (optionValue == null) {
                    System.out.println(
                            "User forget to give which data cleaning method to use. Hence we assume, we use replace data set");
                } else
                    useReplaceDataSet = !optionValue.equals("rm");
                System.out.println(
                        "Finding VFold cross validation considering all the attributes with data clean method : "
                                + optionValue + "...");
                vfoldCrossValidation = true;
            }
        } catch (ParseException e) {
            logger.error("Error while reading command line parameters", e);
            throw new Exception("Error while reading command line parameters", e);
        }
    }
}