Example usage for java.lang Double parseDouble

List of usage examples for java.lang Double parseDouble

Introduction

In this page you can find the example usage for java.lang Double parseDouble.

Prototype

public static double parseDouble(String s) throws NumberFormatException 

Source Link

Document

Returns a new double initialized to the value represented by the specified String , as performed by the valueOf method of class Double .

Usage

From source file:eqtlmappingpipeline.util.ModuleEqtlGeuvadisReplication.java

/**
 * @param args the command line arguments
 *//*from w ww.j  av a 2s . c  o  m*/
public static void main(String[] args) throws IOException, LdCalculatorException {

    System.out.println(HEADER);
    System.out.println();
    System.out.flush(); //flush to make sure header is before errors
    try {
        Thread.sleep(25); //Allows flush to complete
    } catch (InterruptedException ex) {
    }

    CommandLineParser parser = new PosixParser();
    final CommandLine commandLine;
    try {
        commandLine = parser.parse(OPTIONS, args, true);
    } catch (ParseException ex) {
        System.err.println("Invalid command line arguments: " + ex.getMessage());
        System.err.println();
        new HelpFormatter().printHelp(" ", OPTIONS);
        System.exit(1);
        return;
    }

    final String[] genotypesBasePaths = commandLine.getOptionValues("g");
    final RandomAccessGenotypeDataReaderFormats genotypeDataType;
    final String replicationQtlFilePath = commandLine.getOptionValue("e");
    final String interactionQtlFilePath = commandLine.getOptionValue("i");
    final String outputFilePath = commandLine.getOptionValue("o");
    final double ldCutoff = Double.parseDouble(commandLine.getOptionValue("ld"));
    final int window = Integer.parseInt(commandLine.getOptionValue("w"));

    System.out.println("Genotype: " + Arrays.toString(genotypesBasePaths));
    System.out.println("Interaction file: " + interactionQtlFilePath);
    System.out.println("Replication file: " + replicationQtlFilePath);
    System.out.println("Output: " + outputFilePath);
    System.out.println("LD: " + ldCutoff);
    System.out.println("Window: " + window);

    try {
        if (commandLine.hasOption("G")) {
            genotypeDataType = RandomAccessGenotypeDataReaderFormats
                    .valueOf(commandLine.getOptionValue("G").toUpperCase());
        } else {
            if (genotypesBasePaths[0].endsWith(".vcf")) {
                System.err.println(
                        "Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file.");
                System.exit(1);
                return;
            }
            try {
                genotypeDataType = RandomAccessGenotypeDataReaderFormats
                        .matchFormatToPath(genotypesBasePaths[0]);
            } catch (GenotypeDataException e) {
                System.err
                        .println("Unable to determine input 1 type based on specified path. Please specify -G");
                System.exit(1);
                return;
            }
        }
    } catch (IllegalArgumentException e) {
        System.err.println("Error parsing --genotypesFormat \"" + commandLine.getOptionValue("G")
                + "\" is not a valid input data format");
        System.exit(1);
        return;
    }

    final RandomAccessGenotypeData genotypeData;

    try {
        genotypeData = genotypeDataType.createFilteredGenotypeData(genotypesBasePaths, 100, null, null, null,
                0.8);
    } catch (TabixFileNotFoundException e) {
        LOGGER.fatal("Tabix file not found for input data at: " + e.getPath() + "\n"
                + "Please see README on how to create a tabix file");
        System.exit(1);
        return;
    } catch (IOException e) {
        LOGGER.fatal("Error reading input data: " + e.getMessage(), e);
        System.exit(1);
        return;
    } catch (IncompatibleMultiPartGenotypeDataException e) {
        LOGGER.fatal("Error combining the impute genotype data files: " + e.getMessage(), e);
        System.exit(1);
        return;
    } catch (GenotypeDataException e) {
        LOGGER.fatal("Error reading input data: " + e.getMessage(), e);
        System.exit(1);
        return;
    }

    ChrPosTreeMap<ArrayList<EQTL>> replicationQtls = new QTLTextFile(replicationQtlFilePath, false)
            .readQtlsAsTreeMap();

    int interactionSnpNotInGenotypeData = 0;
    int noReplicationQtlsInWindow = 0;
    int noReplicationQtlsInLd = 0;
    int multipleReplicationQtlsInLd = 0;
    int replicationTopSnpNotInGenotypeData = 0;

    final CSVWriter outputWriter = new CSVWriter(new FileWriter(new File(outputFilePath)), '\t', '\0');
    final String[] outputLine = new String[14];
    int c = 0;
    outputLine[c++] = "Chr";
    outputLine[c++] = "Pos";
    outputLine[c++] = "SNP";
    outputLine[c++] = "Gene";
    outputLine[c++] = "Module";
    outputLine[c++] = "DiscoveryZ";
    outputLine[c++] = "ReplicationZ";
    outputLine[c++] = "DiscoveryZCorrected";
    outputLine[c++] = "ReplicationZCorrected";
    outputLine[c++] = "DiscoveryAlleleAssessed";
    outputLine[c++] = "ReplicationAlleleAssessed";
    outputLine[c++] = "bestLd";
    outputLine[c++] = "bestLd_dist";
    outputLine[c++] = "nextLd";
    outputWriter.writeNext(outputLine);

    HashSet<String> notFound = new HashSet<>();

    CSVReader interactionQtlReader = new CSVReader(new FileReader(interactionQtlFilePath), '\t');
    interactionQtlReader.readNext();//skip header
    String[] interactionQtlLine;
    while ((interactionQtlLine = interactionQtlReader.readNext()) != null) {

        String snp = interactionQtlLine[1];
        String chr = interactionQtlLine[2];
        int pos = Integer.parseInt(interactionQtlLine[3]);
        String gene = interactionQtlLine[4];
        String alleleAssessed = interactionQtlLine[9];
        String module = interactionQtlLine[12];
        double discoveryZ = Double.parseDouble(interactionQtlLine[10]);

        GeneticVariant interactionQtlVariant = genotypeData.getSnpVariantByPos(chr, pos);

        if (interactionQtlVariant == null) {
            System.err.println("Interaction QTL SNP not found in genotype data: " + chr + ":" + pos);
            ++interactionSnpNotInGenotypeData;
            continue;
        }

        EQTL bestMatch = null;
        double bestMatchR2 = Double.NaN;
        Ld bestMatchLd = null;
        double nextBestR2 = Double.NaN;

        ArrayList<EQTL> sameSnpQtls = replicationQtls.get(chr, pos);

        if (sameSnpQtls != null) {
            for (EQTL sameSnpQtl : sameSnpQtls) {
                if (sameSnpQtl.getProbe().equals(gene)) {
                    bestMatch = sameSnpQtl;
                    bestMatchR2 = 1;
                }
            }
        }

        NavigableMap<Integer, ArrayList<EQTL>> potentionalReplicationQtls = replicationQtls.getChrRange(chr,
                pos - window, true, pos + window, true);

        for (ArrayList<EQTL> potentialReplicationQtls : potentionalReplicationQtls.values()) {

            for (EQTL potentialReplicationQtl : potentialReplicationQtls) {

                if (!potentialReplicationQtl.getProbe().equals(gene)) {
                    continue;
                }

                GeneticVariant potentialReplicationQtlVariant = genotypeData.getSnpVariantByPos(
                        potentialReplicationQtl.getRsChr().toString(), potentialReplicationQtl.getRsChrPos());

                if (potentialReplicationQtlVariant == null) {
                    notFound.add(potentialReplicationQtl.getRsChr().toString() + ":"
                            + potentialReplicationQtl.getRsChrPos());
                    ++replicationTopSnpNotInGenotypeData;
                    continue;
                }

                Ld ld = interactionQtlVariant.calculateLd(potentialReplicationQtlVariant);
                double r2 = ld.getR2();

                if (r2 > 1) {
                    r2 = 1;
                }

                if (bestMatch == null) {
                    bestMatch = potentialReplicationQtl;
                    bestMatchR2 = r2;
                    bestMatchLd = ld;
                } else if (r2 > bestMatchR2) {
                    bestMatch = potentialReplicationQtl;
                    nextBestR2 = bestMatchR2;
                    bestMatchR2 = r2;
                    bestMatchLd = ld;
                }

            }
        }

        double replicationZ = Double.NaN;
        double replicationZCorrected = Double.NaN;
        double discoveryZCorrected = Double.NaN;

        String replicationAlleleAssessed = null;

        if (bestMatch != null) {
            replicationZ = bestMatch.getZscore();
            replicationAlleleAssessed = bestMatch.getAlleleAssessed();

            if (pos != bestMatch.getRsChrPos()) {

                String commonHap = null;
                double commonHapFreq = -1;
                for (Map.Entry<String, Double> hapFreq : bestMatchLd.getHaplotypesFreq().entrySet()) {

                    double f = hapFreq.getValue();

                    if (f > commonHapFreq) {
                        commonHapFreq = f;
                        commonHap = hapFreq.getKey();
                    }

                }

                String[] commonHapAlleles = StringUtils.split(commonHap, '/');

                discoveryZCorrected = commonHapAlleles[0].equals(alleleAssessed) ? discoveryZ : discoveryZ * -1;
                replicationZCorrected = commonHapAlleles[1].equals(replicationAlleleAssessed) ? replicationZ
                        : replicationZ * -1;

            } else {

                discoveryZCorrected = discoveryZ;
                replicationZCorrected = alleleAssessed.equals(replicationAlleleAssessed) ? replicationZ
                        : replicationZ * -1;

            }

        }

        c = 0;
        outputLine[c++] = chr;
        outputLine[c++] = String.valueOf(pos);
        outputLine[c++] = snp;
        outputLine[c++] = gene;
        outputLine[c++] = module;
        outputLine[c++] = String.valueOf(discoveryZ);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZ);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(discoveryZCorrected);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZCorrected);
        outputLine[c++] = alleleAssessed;
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(bestMatch.getAlleleAssessed());
        outputLine[c++] = String.valueOf(bestMatchR2);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(Math.abs(pos - bestMatch.getRsChrPos()));
        outputLine[c++] = String.valueOf(nextBestR2);
        outputWriter.writeNext(outputLine);

    }

    outputWriter.close();

    for (String e : notFound) {
        System.err.println("Not found: " + e);
    }

    System.out.println("interactionSnpNotInGenotypeData: " + interactionSnpNotInGenotypeData);
    System.out.println("noReplicationQtlsInWindow: " + noReplicationQtlsInWindow);
    System.out.println("noReplicationQtlsInLd: " + noReplicationQtlsInLd);
    System.out.println("multipleReplicationQtlsInLd: " + multipleReplicationQtlsInLd);
    System.out.println("replicationTopSnpNotInGenotypeData: " + replicationTopSnpNotInGenotypeData);

}

From source file:cc.twittertools.stream.GatherStatusStream.java

@SuppressWarnings("static-access")
public static void main(String[] args) throws TwitterException {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("list").hasArgs()
            .withDescription("comma-separated list of BCP 47 language identifiers").withLongOpt(LANGUAGE_OPTION)
            .create('l'));
    options.addOption(OptionBuilder.withArgName("list").hasArgs()
            .withDescription(/*from   w ww. j  a  v a2 s  .c  o m*/
                    "comma-separated list of longitude,latitude pairs specifying a set of bounding boxes")
            .withLongOpt(LOCATIONS_OPTION).create('g'));
    options.addOption("n", NO_BOUNDING_BOX_OPTION, false, "do not consider places' bounding box");

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(RunQueriesThrift.class.getName(), options);
        System.exit(-1);
    }

    PatternLayout layoutStandard = new PatternLayout();
    layoutStandard.setConversionPattern("[%p] %d %c %M - %m%n");

    PatternLayout layoutSimple = new PatternLayout();
    layoutSimple.setConversionPattern("%m%n");

    // Filter for the statuses: we only want INFO messages
    LevelRangeFilter filter = new LevelRangeFilter();
    filter.setLevelMax(Level.INFO);
    filter.setLevelMin(Level.INFO);
    filter.setAcceptOnMatch(true);
    filter.activateOptions();

    TimeBasedRollingPolicy statusesRollingPolicy = new TimeBasedRollingPolicy();
    statusesRollingPolicy.setFileNamePattern("statuses.log" + HOUR_ROLL);
    statusesRollingPolicy.activateOptions();

    RollingFileAppender statusesAppender = new RollingFileAppender();
    statusesAppender.setRollingPolicy(statusesRollingPolicy);
    statusesAppender.addFilter(filter);
    statusesAppender.setLayout(layoutSimple);
    statusesAppender.activateOptions();

    TimeBasedRollingPolicy warningsRollingPolicy = new TimeBasedRollingPolicy();
    warningsRollingPolicy.setFileNamePattern("warnings.log" + HOUR_ROLL);
    warningsRollingPolicy.activateOptions();

    RollingFileAppender warningsAppender = new RollingFileAppender();
    warningsAppender.setRollingPolicy(statusesRollingPolicy);
    warningsAppender.setThreshold(Level.WARN);
    warningsAppender.setLayout(layoutStandard);
    warningsAppender.activateOptions();

    ConsoleAppender consoleAppender = new ConsoleAppender();
    consoleAppender.setThreshold(Level.WARN);
    consoleAppender.setLayout(layoutStandard);
    consoleAppender.activateOptions();

    // configures the root logger
    Logger rootLogger = Logger.getRootLogger();
    rootLogger.setLevel(Level.INFO);
    rootLogger.removeAllAppenders();
    rootLogger.addAppender(consoleAppender);
    rootLogger.addAppender(statusesAppender);
    rootLogger.addAppender(warningsAppender);

    // creates filters for the query
    FilterQuery fq = new FilterQuery();
    StringBuilder criteria = new StringBuilder();

    /*
     * @see https://dev.twitter.com/docs/streaming-apis/parameters#language
     */
    final boolean filterLanguage = cmdline.hasOption(LANGUAGE_OPTION);
    String[] languages = null;
    if (filterLanguage) {
        languages = cmdline.getOptionValue(LANGUAGE_OPTION).split(",");
        fq.language(languages);
        criteria.append("languages: [" + cmdline.getOptionValue(LANGUAGE_OPTION) + "]\t");
    }
    final String[] langs = languages;

    /*
     * @see https://dev.twitter.com/docs/streaming-apis/parameters#locations
     */
    double[][] locations = null;
    if (cmdline.hasOption(LOCATIONS_OPTION)) {
        String[] locationsArg = cmdline.getOptionValue(LOCATIONS_OPTION).split(",");
        int nCoords = locationsArg.length;
        if (nCoords % 2 == 0) {
            int pairs = nCoords / 2;
            locations = new double[pairs][2];
            int cnt = 0;
            for (int i = 0; i < pairs; i++) {
                locations[i][0] = Double.parseDouble(locationsArg[cnt]);
                cnt++;
                locations[i][1] = Double.parseDouble(locationsArg[cnt]);
                cnt++;
            }
            fq.locations(locations);
            criteria.append("locations: [" + cmdline.getOptionValue(LOCATIONS_OPTION) + "]\t");
        } else {
            System.err.println("There is a missing coordinate. See "
                    + "https://dev.twitter.com/docs/streaming-apis/parameters#locations");
            System.exit(-1);
        }
    } else {
        fq.locations(new double[][] { { -180, -90 }, { 180, 90 } });
    }
    final double[][] loc = locations;

    final boolean no_bounding_box = cmdline.hasOption(NO_BOUNDING_BOX_OPTION);
    if (no_bounding_box) {
        criteria.append("--no-bounding-box\t");
    }

    // creates a custom logger and log messages
    final Logger logger = Logger.getLogger(GatherStatusStream.class);

    logger.info(criteria);

    RawStreamListener rawListener = new RawStreamListener() {

        @Override
        public void onMessage(String rawString) {
            if (no_bounding_box && loc != null) {
                try {
                    JSONObject status = new JSONObject(rawString);
                    JSONObject coordObj = status.getJSONObject("coordinates");
                    JSONArray coords = coordObj.getJSONArray("coordinates");
                    double longitude = coords.getDouble(0);
                    double latitude = coords.getDouble(1);

                    // checks location
                    for (int i = 0; i < loc.length; i += 2) {
                        if (((loc[i][0] <= longitude) && (longitude <= loc[i + 1][0]))
                                || ((loc[i][1] <= latitude) && (latitude <= loc[i + 1][1]))) {
                            break;
                        } else if (i == loc.length - 1)
                            return;
                    }
                } catch (JSONException e) { /* Either "Coordinates" is null or trash is coming*/
                    return;
                }
            }

            if (filterLanguage) {
                try {
                    JSONObject status = new JSONObject(rawString);
                    // checks language
                    String lang = status.getString("lang");
                    for (int i = 0; i < langs.length; i++) {
                        if (langs[i].equals(lang))
                            break;
                        else if (i == langs.length - 1)
                            return;
                    }
                } catch (JSONException e) { /* Trash is coming */
                    return;
                }
            }
            cnt++;
            logger.info(rawString);
            if (cnt % 1000 == 0) {
                System.out.println(cnt + " messages received.");
            }
        }

        @Override
        public void onException(Exception ex) {
            logger.warn(ex);
        }
    };

    TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
    twitterStream.addListener(rawListener);
    twitterStream.filter(fq);
}

From source file:eu.fbk.utils.lsa.util.Anvur.java

public static void main(String[] args) throws Exception {
    String logConfig = System.getProperty("log-config");
    if (logConfig == null) {
        logConfig = "log-config.txt";
    }/*from  ww w .j  a  v  a 2s .c o m*/

    PropertyConfigurator.configure(logConfig);
    /*
    if (args.length != 2)
    {
    log.println("Usage: java -mx512M eu.fbk.utils.lsa.util.Anvur in-file out-dir");
    System.exit(1);
    }
            
    File l = new File(args[1]);
    if (!l.exists())
    {
    l.mkdir();
    }
    List<String[]> list = readText(new File(args[0]));
    String oldCategory = "";
    for (int i=0;i<list.size();i++)
    {
    String[] s = list.get(i);
    if (!oldCategory.equals(s[0]))
    {
    File f = new File(args[1] + File.separator + s[0]);
    boolean b = f.mkdir();
    logger.debug(f + " created " + b);
    }
            
    File g = new File(args[1] + File.separator + s[0] + File.separator + s[1] + ".txt");
    logger.debug("writing " + g + "...");
    PrintWriter pw = new PrintWriter(new FileWriter(g));
    //pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2]));
    if (s.length == 5)
    {
    pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2] + " " + s[4].replace('_', ' ')));
    }
    else
    {
    pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2]));
    }
    pw.flush();
    pw.close();
            
    } // end for i
    */

    if (args.length != 7) {
        System.out.println(args.length);
        System.out.println(
                "Usage: java -mx2G eu.fbk.utils.lsa.util.Anvur input threshold size dim idf in-file-csv fields\n\n");
        System.exit(1);
    }

    //
    DecimalFormat dec = new DecimalFormat("#.00");

    File Ut = new File(args[0] + "-Ut");
    File Sk = new File(args[0] + "-S");
    File r = new File(args[0] + "-row");
    File c = new File(args[0] + "-col");
    File df = new File(args[0] + "-df");
    double threshold = Double.parseDouble(args[1]);
    int size = Integer.parseInt(args[2]);
    int dim = Integer.parseInt(args[3]);
    boolean rescaleIdf = Boolean.parseBoolean(args[4]);

    //"author_check"0,   "authors"1,   "title"2,   "year"3,   "pubtype"4,   "publisher"5,   "journal"6,   "volume"7,   "number"8,   "pages"9,   "abstract"10,   "nauthors",   "citedby"
    String[] labels = { "author_check", "authors", "title", "year", "pubtype", "publisher", "journal", "volume",
            "number", "pages", "abstract", "nauthors", "citedby"
            //author_id   authors   title   year   pubtype   publisher   journal   volume   number   pages   abstract   nauthors   citedby

    };
    String name = buildName(labels, args[6]);

    File bwf = new File(args[5] + name + "-bow.txt");
    PrintWriter bw = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bwf), "UTF-8")));
    File bdf = new File(args[5] + name + "-bow.csv");
    PrintWriter bd = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bdf), "UTF-8")));
    File lwf = new File(args[5] + name + "-ls.txt");
    PrintWriter lw = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lwf), "UTF-8")));
    File ldf = new File(args[5] + name + "-ls.csv");
    PrintWriter ld = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(ldf), "UTF-8")));
    File blwf = new File(args[5] + name + "-bow+ls.txt");
    PrintWriter blw = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(blwf), "UTF-8")));
    File bldf = new File(args[5] + name + "-bow+ls.csv");
    PrintWriter bld = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bldf), "UTF-8")));
    File logf = new File(args[5] + name + ".log");
    PrintWriter log = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(logf), "UTF-8")));

    //System.exit(0);
    LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf);
    LSSimilarity lss = new LSSimilarity(lsm, size);

    List<String[]> list = readText(new File(args[5]));

    // author_check   authors   title   year   pubtype   publisher   journal   volume   number   pages   abstract   nauthors   citedby

    //header
    for (int i = 0; i < list.size(); i++) {
        String[] s1 = list.get(i);
        String t1 = s1[0].toLowerCase();
        bw.print("\t");
        lw.print("\t");
        blw.print("\t");
        bw.print(i + "(" + s1[0] + ")");
        lw.print(i + "(" + s1[0] + ")");
        blw.print(i + "(" + s1[0] + ")");
    } // end for i

    bw.print("\n");
    lw.print("\n");
    blw.print("\n");
    for (int i = 0; i < list.size(); i++) {
        logger.info(i + "\t");
        String[] s1 = list.get(i);
        String t1 = buildText(s1, args[6]);
        BOW bow1 = new BOW(t1);
        logger.info(bow1);

        Vector d1 = lsm.mapDocument(bow1);
        d1.normalize();
        log.println("d1:" + d1);

        Vector pd1 = lsm.mapPseudoDocument(d1);
        pd1.normalize();
        log.println("pd1:" + pd1);

        Vector m1 = merge(pd1, d1);
        log.println("m1:" + m1);

        // write the orginal line
        for (int j = 0; j < s1.length; j++) {
            bd.print(s1[j]);
            bd.print("\t");
            ld.print(s1[j]);
            ld.print("\t");
            bld.print(s1[j]);
            bld.print("\t");

        }
        // write the bow, ls, and bow+ls vectors
        bd.println(d1);
        ld.println(pd1);
        bld.println(m1);

        bw.print(i + "(" + s1[0] + ")");
        lw.print(i + "(" + s1[0] + ")");
        blw.print(i + "(" + s1[0] + ")");
        for (int j = 0; j < i + 1; j++) {
            bw.print("\t");
            lw.print("\t");
            blw.print("\t");
        } // end for j

        for (int j = i + 1; j < list.size(); j++) {
            logger.info(i + "\t" + j);
            String[] s2 = list.get(j);

            String t2 = buildText(s2, args[6]);
            BOW bow2 = new BOW(t2);

            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t1:" + t1);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t2:" + t2);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow1:" + bow1);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow2:" + bow2);

            Vector d2 = lsm.mapDocument(bow2);
            d2.normalize();
            log.println("d2:" + d2);

            Vector pd2 = lsm.mapPseudoDocument(d2);
            pd2.normalize();
            log.println("pd2:" + pd2);

            Vector m2 = merge(pd2, d2);
            log.println("m2:" + m2);

            float cosVSM = d1.dotProduct(d2) / (float) Math.sqrt(d1.dotProduct(d1) * d2.dotProduct(d2));
            float cosLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(pd1.dotProduct(pd1) * pd2.dotProduct(pd2));
            float cosBOWLSM = m1.dotProduct(m2) / (float) Math.sqrt(m1.dotProduct(m1) * m2.dotProduct(m2));
            bw.print("\t");
            bw.print(dec.format(cosVSM));
            lw.print("\t");
            lw.print(dec.format(cosLSM));
            blw.print("\t");
            blw.print(dec.format(cosBOWLSM));

            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow\t" + cosVSM);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") ls:\t" + cosLSM);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow+ls:\t" + cosBOWLSM);
        }
        bw.print("\n");
        lw.print("\n");
        blw.print("\n");
    } // end for i

    logger.info("wrote " + bwf);
    logger.info("wrote " + bwf);
    logger.info("wrote " + bdf);
    logger.info("wrote " + lwf);
    logger.info("wrote " + ldf);
    logger.info("wrote " + blwf);
    logger.info("wrote " + bldf);
    logger.info("wrote " + logf);

    ld.close();
    bd.close();
    bld.close();
    bw.close();
    lw.close();
    blw.close();

    log.close();

}

From source file:CTmousetrack.java

public static void main(String[] args) {

    String outLoc = new String("." + File.separator + "CTdata"); // Location of the base output data folder; only used when writing out CT data to a local folder
    String srcName = "CTmousetrack"; // name of the output CT source
    long blockPts = 10; // points per block flush
    long sampInterval = 10; // time between sampling updates, msec
    double trimTime = 0.0; // amount of data to keep (trim time), sec
    boolean debug = false; // turn on debug?

    // Specify the CT output connection
    CTWriteMode writeMode = CTWriteMode.LOCAL; // The selected mode for writing out CT data
    String serverHost = ""; // Server (FTP or HTTP/S) host:port
    String serverUser = ""; // Server (FTP or HTTPS) username
    String serverPassword = ""; // Server (FTP or HTTPS) password

    // For UDP output mode
    DatagramSocket udpServerSocket = null;
    InetAddress udpServerAddress = null;
    String udpHost = "";
    int udpPort = -1;

    // Concatenate all of the CTWriteMode types
    String possibleWriteModes = "";
    for (CTWriteMode wm : CTWriteMode.values()) {
        possibleWriteModes = possibleWriteModes + ", " + wm.name();
    }//from   ww  w  .  j  a v  a2  s  .c o m
    // Remove ", " from start of string
    possibleWriteModes = possibleWriteModes.substring(2);

    //
    // Argument processing using Apache Commons CLI
    //
    // 1. Setup command line options
    Options options = new Options();
    options.addOption("h", "help", false, "Print this message.");
    options.addOption(Option.builder("o").argName("base output dir").hasArg().desc(
            "Base output directory when writing data to local folder (i.e., this is the location of CTdata folder); default = \""
                    + outLoc + "\".")
            .build());
    options.addOption(Option.builder("s").argName("source name").hasArg()
            .desc("Name of source to write data to; default = \"" + srcName + "\".").build());
    options.addOption(Option.builder("b").argName("points per block").hasArg()
            .desc("Number of points per block; UDP output mode will use 1 point/block; default = "
                    + Long.toString(blockPts) + ".")
            .build());
    options.addOption(Option.builder("dt").argName("samp interval msec").hasArg()
            .desc("Sampling period in msec; default = " + Long.toString(sampInterval) + ".").build());
    options.addOption(Option.builder("t").argName("trim time sec").hasArg().desc(
            "Trim (ring-buffer loop) time (sec); this is only used when writing data to local folder; specify 0 for indefinite; default = "
                    + Double.toString(trimTime) + ".")
            .build());
    options.addOption(
            Option.builder("w").argName("write mode").hasArg()
                    .desc("Type of write connection; one of " + possibleWriteModes
                            + "; all but UDP mode write out to CT; default = " + writeMode.name() + ".")
                    .build());
    options.addOption(Option.builder("host").argName("host[:port]").hasArg()
            .desc("Host:port when writing via FTP, HTTP, HTTPS, UDP.").build());
    options.addOption(Option.builder("u").argName("username,password").hasArg()
            .desc("Comma-delimited username and password when writing to CT via FTP or HTTPS.").build());
    options.addOption("x", "debug", false, "Enable CloudTurbine debug output.");

    // 2. Parse command line options
    CommandLineParser parser = new DefaultParser();
    CommandLine line = null;
    try {
        line = parser.parse(options, args);
    } catch (ParseException exp) { // oops, something went wrong
        System.err.println("Command line argument parsing failed: " + exp.getMessage());
        return;
    }

    // 3. Retrieve the command line values
    if (line.hasOption("help")) { // Display help message and quit
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp("CTmousetrack", "", options,
                "NOTE: UDP output is a special non-CT output mode where single x,y points are sent via UDP to the specified host:port.");
        return;
    }

    outLoc = line.getOptionValue("o", outLoc);
    if (!outLoc.endsWith("\\") && !outLoc.endsWith("/")) {
        outLoc = outLoc + File.separator;
    }
    // Make sure the base output folder location ends in "CTdata"
    if (!outLoc.endsWith("CTdata\\") && !outLoc.endsWith("CTdata/")) {
        outLoc = outLoc + "CTdata" + File.separator;
    }

    srcName = line.getOptionValue("s", srcName);

    blockPts = Long.parseLong(line.getOptionValue("b", Long.toString(blockPts)));

    sampInterval = Long.parseLong(line.getOptionValue("dt", Long.toString(sampInterval)));

    trimTime = Double.parseDouble(line.getOptionValue("t", Double.toString(trimTime)));

    // Type of output connection
    String writeModeStr = line.getOptionValue("w", writeMode.name());
    boolean bMatch = false;
    for (CTWriteMode wm : CTWriteMode.values()) {
        if (wm.name().toLowerCase().equals(writeModeStr.toLowerCase())) {
            writeMode = wm;
            bMatch = true;
        }
    }
    if (!bMatch) {
        System.err.println("Unrecognized write mode, \"" + writeModeStr + "\"; write mode must be one of "
                + possibleWriteModes);
        System.exit(0);
    }
    if (writeMode != CTWriteMode.LOCAL) {
        // User must have specified the host
        // If FTP or HTTPS, they may also specify username/password
        serverHost = line.getOptionValue("host", serverHost);
        if (serverHost.isEmpty()) {
            System.err.println(
                    "When using write mode \"" + writeModeStr + "\", you must specify the server host.");
            System.exit(0);
        }
        if (writeMode == CTWriteMode.UDP) {
            // Force blockPts to be 1
            blockPts = 1;
            // User must have specified both host and port
            int colonIdx = serverHost.indexOf(':');
            if ((colonIdx == -1) || (colonIdx >= serverHost.length() - 1)) {
                System.err.println(
                        "For UDP output mode, both the host and port (<host>:<port>)) must be specified.");
                System.exit(0);
            }
            udpHost = serverHost.substring(0, colonIdx);
            String udpPortStr = serverHost.substring(colonIdx + 1);
            try {
                udpPort = Integer.parseInt(udpPortStr);
            } catch (NumberFormatException nfe) {
                System.err.println("The UDP port must be a positive integer.");
                System.exit(0);
            }
        }
        if ((writeMode == CTWriteMode.FTP) || (writeMode == CTWriteMode.HTTPS)) {
            String userpassStr = line.getOptionValue("u", "");
            if (!userpassStr.isEmpty()) {
                // This string should be comma-delimited username and password
                String[] userpassCSV = userpassStr.split(",");
                if (userpassCSV.length != 2) {
                    System.err.println("When specifying a username and password for write mode \""
                            + writeModeStr + "\", separate the username and password by a comma.");
                    System.exit(0);
                }
                serverUser = userpassCSV[0];
                serverPassword = userpassCSV[1];
            }
        }
    }

    debug = line.hasOption("debug");

    System.err.println("CTmousetrack parameters:");
    System.err.println("\toutput mode = " + writeMode.name());
    if (writeMode == CTWriteMode.UDP) {
        System.err.println("\twrite to " + udpHost + ":" + udpPort);
    } else {
        System.err.println("\tsource = " + srcName);
        System.err.println("\ttrim time = " + trimTime + " sec");
    }
    System.err.println("\tpoints per block = " + blockPts);
    System.err.println("\tsample interval = " + sampInterval + " msec");

    try {
        //
        // Setup CTwriter or UDP output
        //
        CTwriter ctw = null;
        CTinfo.setDebug(debug);
        if (writeMode == CTWriteMode.LOCAL) {
            ctw = new CTwriter(outLoc + srcName, trimTime);
            System.err.println("\tdata will be written to local folder \"" + outLoc + "\"");
        } else if (writeMode == CTWriteMode.FTP) {
            CTftp ctftp = new CTftp(srcName);
            try {
                ctftp.login(serverHost, serverUser, serverPassword);
            } catch (Exception e) {
                throw new IOException(
                        new String("Error logging into FTP server \"" + serverHost + "\":\n" + e.getMessage()));
            }
            ctw = ctftp; // upcast to CTWriter
            System.err.println("\tdata will be written to FTP server at " + serverHost);
        } else if (writeMode == CTWriteMode.HTTP) {
            // Don't send username/pw in HTTP mode since they will be unencrypted
            CThttp cthttp = new CThttp(srcName, "http://" + serverHost);
            ctw = cthttp; // upcast to CTWriter
            System.err.println("\tdata will be written to HTTP server at " + serverHost);
        } else if (writeMode == CTWriteMode.HTTPS) {
            CThttp cthttp = new CThttp(srcName, "https://" + serverHost);
            // Username/pw are optional for HTTPS mode; only use them if username is not empty
            if (!serverUser.isEmpty()) {
                try {
                    cthttp.login(serverUser, serverPassword);
                } catch (Exception e) {
                    throw new IOException(new String(
                            "Error logging into HTTP server \"" + serverHost + "\":\n" + e.getMessage()));
                }
            }
            ctw = cthttp; // upcast to CTWriter
            System.err.println("\tdata will be written to HTTPS server at " + serverHost);
        } else if (writeMode == CTWriteMode.UDP) {
            try {
                udpServerSocket = new DatagramSocket();
            } catch (SocketException se) {
                System.err.println("Error creating socket for UDP:\n" + se);
                System.exit(0);
            }
            try {
                udpServerAddress = InetAddress.getByName(udpHost);
            } catch (UnknownHostException uhe) {
                System.err.println("Error getting UDP server host address:\n" + uhe);
                System.exit(0);
            }
        }
        if (writeMode != CTWriteMode.UDP) {
            ctw.setBlockMode(blockPts > 1, blockPts > 1);
            ctw.autoFlush(0); // no autoflush
            ctw.autoSegment(1000);
        }

        // screen dims
        Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
        double width = screenSize.getWidth();
        double height = screenSize.getHeight();

        // use Map for consolidated putData
        Map<String, Object> cmap = new LinkedHashMap<String, Object>();

        // loop and write some output
        for (int i = 0; i < 1000000; i++) { // go until killed
            long currentTime = System.currentTimeMillis();
            Point mousePos = MouseInfo.getPointerInfo().getLocation();
            float x_pt = (float) (mousePos.getX() / width); // normalize
            float y_pt = (float) ((height - mousePos.getY()) / height); // flip Y (so bottom=0)
            if (writeMode != CTWriteMode.UDP) {
                // CT output mode
                ctw.setTime(currentTime);
                cmap.clear();
                cmap.put("x", x_pt);
                cmap.put("y", y_pt);
                ctw.putData(cmap);
                if (((i + 1) % blockPts) == 0) {
                    ctw.flush();
                    System.err.print(".");
                }
            } else {
                // UDP output mode
                // We force blockPts to be 1 for UDP output mode, i.e. we "flush" the data every time
                // Write the following data (21 bytes total):
                //     header = "MOUSE", 5 bytes
                //     current time, long, 8 bytes
                //     2 floats (x,y) 4 bytes each, 8 bytes
                int len = 21;
                ByteBuffer bb = ByteBuffer.allocate(len);
                String headerStr = "MOUSE";
                bb.put(headerStr.getBytes("UTF-8"));
                bb.putLong(currentTime);
                bb.putFloat(x_pt);
                bb.putFloat(y_pt);
                // Might be able to use the following, but not sure:
                //     byte[] sendData = bb.array();
                byte[] sendData = new byte[len];
                bb.position(0);
                bb.get(sendData, 0, len);
                DatagramPacket sendPacket = new DatagramPacket(sendData, sendData.length, udpServerAddress,
                        udpPort);
                try {
                    udpServerSocket.send(sendPacket);
                } catch (IOException e) {
                    System.err.println("Test server caught exception trying to send data to UDP client:\n" + e);
                }
                System.err.print(".");
            }
            try {
                Thread.sleep(sampInterval);
            } catch (Exception e) {
            }
            ;
        }
        if (writeMode != CTWriteMode.UDP) {
            ctw.flush(); // wrap up
        }
    } catch (Exception e) {
        System.err.println("CTmousetrack exception: " + e);
        e.printStackTrace();
    }
}

From source file:eqtlmappingpipeline.util.ModuleEqtlNeutrophilReplication.java

/**
 * @param args the command line arguments
 *///from   w w w .j a  v a 2 s  . c o  m
public static void main(String[] args) throws IOException, LdCalculatorException {

    System.out.println(HEADER);
    System.out.println();
    System.out.flush(); //flush to make sure header is before errors
    try {
        Thread.sleep(25); //Allows flush to complete
    } catch (InterruptedException ex) {
    }

    CommandLineParser parser = new PosixParser();
    final CommandLine commandLine;
    try {
        commandLine = parser.parse(OPTIONS, args, true);
    } catch (ParseException ex) {
        System.err.println("Invalid command line arguments: " + ex.getMessage());
        System.err.println();
        new HelpFormatter().printHelp(" ", OPTIONS);
        System.exit(1);
        return;
    }

    final String[] genotypesBasePaths = commandLine.getOptionValues("g");
    final RandomAccessGenotypeDataReaderFormats genotypeDataType;
    final String replicationQtlFilePath = commandLine.getOptionValue("e");
    final String interactionQtlFilePath = commandLine.getOptionValue("i");
    final String outputFilePath = commandLine.getOptionValue("o");
    final double ldCutoff = Double.parseDouble(commandLine.getOptionValue("ld"));
    final int window = Integer.parseInt(commandLine.getOptionValue("w"));

    System.out.println("Genotype: " + Arrays.toString(genotypesBasePaths));
    System.out.println("Interaction file: " + interactionQtlFilePath);
    System.out.println("Replication file: " + replicationQtlFilePath);
    System.out.println("Output: " + outputFilePath);
    System.out.println("LD: " + ldCutoff);
    System.out.println("Window: " + window);

    try {
        if (commandLine.hasOption("G")) {
            genotypeDataType = RandomAccessGenotypeDataReaderFormats
                    .valueOf(commandLine.getOptionValue("G").toUpperCase());
        } else {
            if (genotypesBasePaths[0].endsWith(".vcf")) {
                System.err.println(
                        "Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file.");
                System.exit(1);
                return;
            }
            try {
                genotypeDataType = RandomAccessGenotypeDataReaderFormats
                        .matchFormatToPath(genotypesBasePaths[0]);
            } catch (GenotypeDataException e) {
                System.err
                        .println("Unable to determine input 1 type based on specified path. Please specify -G");
                System.exit(1);
                return;
            }
        }
    } catch (IllegalArgumentException e) {
        System.err.println("Error parsing --genotypesFormat \"" + commandLine.getOptionValue("G")
                + "\" is not a valid input data format");
        System.exit(1);
        return;
    }

    final RandomAccessGenotypeData genotypeData;

    try {
        genotypeData = genotypeDataType.createFilteredGenotypeData(genotypesBasePaths, 100, null, null, null,
                0.8);
    } catch (TabixFileNotFoundException e) {
        LOGGER.fatal("Tabix file not found for input data at: " + e.getPath() + "\n"
                + "Please see README on how to create a tabix file");
        System.exit(1);
        return;
    } catch (IOException e) {
        LOGGER.fatal("Error reading input data: " + e.getMessage(), e);
        System.exit(1);
        return;
    } catch (IncompatibleMultiPartGenotypeDataException e) {
        LOGGER.fatal("Error combining the impute genotype data files: " + e.getMessage(), e);
        System.exit(1);
        return;
    } catch (GenotypeDataException e) {
        LOGGER.fatal("Error reading input data: " + e.getMessage(), e);
        System.exit(1);
        return;
    }

    ChrPosTreeMap<ArrayList<ReplicationQtl>> replicationQtls = new ChrPosTreeMap<>();

    CSVReader replicationQtlReader = new CSVReader(new FileReader(replicationQtlFilePath), '\t');
    replicationQtlReader.readNext();//skip header
    String[] replicationLine;
    while ((replicationLine = replicationQtlReader.readNext()) != null) {

        try {

            GeneticVariant variant = genotypeData.getSnpVariantByPos(replicationLine[REPLICATION_SNP_CHR_COL],
                    Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL]));
            if (variant == null) {
                continue;
            }

            ReplicationQtl replicationQtl = new ReplicationQtl(replicationLine[REPLICATION_SNP_CHR_COL],
                    Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL]),
                    replicationLine[REPLICATION_GENE_COL],
                    Double.parseDouble(replicationLine[REPLICATION_BETA_COL]),
                    variant.getAlternativeAlleles().get(0).getAlleleAsString());
            ArrayList<ReplicationQtl> posReplicationQtls = replicationQtls.get(replicationQtl.getChr(),
                    replicationQtl.getPos());
            if (posReplicationQtls == null) {
                posReplicationQtls = new ArrayList<>();
                replicationQtls.put(replicationQtl.getChr(), replicationQtl.getPos(), posReplicationQtls);
            }
            posReplicationQtls.add(replicationQtl);

        } catch (Exception e) {
            System.out.println(Arrays.toString(replicationLine));
            throw e;
        }
    }

    int interactionSnpNotInGenotypeData = 0;
    int noReplicationQtlsInWindow = 0;
    int noReplicationQtlsInLd = 0;
    int multipleReplicationQtlsInLd = 0;
    int replicationTopSnpNotInGenotypeData = 0;

    final CSVWriter outputWriter = new CSVWriter(new FileWriter(new File(outputFilePath)), '\t', '\0');
    final String[] outputLine = new String[14];
    int c = 0;
    outputLine[c++] = "Chr";
    outputLine[c++] = "Pos";
    outputLine[c++] = "SNP";
    outputLine[c++] = "Gene";
    outputLine[c++] = "Module";
    outputLine[c++] = "DiscoveryZ";
    outputLine[c++] = "ReplicationZ";
    outputLine[c++] = "DiscoveryZCorrected";
    outputLine[c++] = "ReplicationZCorrected";
    outputLine[c++] = "DiscoveryAlleleAssessed";
    outputLine[c++] = "ReplicationAlleleAssessed";
    outputLine[c++] = "bestLd";
    outputLine[c++] = "bestLd_dist";
    outputLine[c++] = "nextLd";
    outputWriter.writeNext(outputLine);

    HashSet<String> notFound = new HashSet<>();

    CSVReader interactionQtlReader = new CSVReader(new FileReader(interactionQtlFilePath), '\t');
    interactionQtlReader.readNext();//skip header
    String[] interactionQtlLine;
    while ((interactionQtlLine = interactionQtlReader.readNext()) != null) {

        String snp = interactionQtlLine[1];
        String chr = interactionQtlLine[2];
        int pos = Integer.parseInt(interactionQtlLine[3]);
        String gene = interactionQtlLine[4];
        String alleleAssessed = interactionQtlLine[9];
        String module = interactionQtlLine[12];
        double discoveryZ = Double.parseDouble(interactionQtlLine[10]);

        GeneticVariant interactionQtlVariant = genotypeData.getSnpVariantByPos(chr, pos);

        if (interactionQtlVariant == null) {
            System.err.println("Interaction QTL SNP not found in genotype data: " + chr + ":" + pos);
            ++interactionSnpNotInGenotypeData;
            continue;
        }

        ReplicationQtl bestMatch = null;
        double bestMatchR2 = Double.NaN;
        Ld bestMatchLd = null;
        double nextBestR2 = Double.NaN;

        ArrayList<ReplicationQtl> sameSnpQtls = replicationQtls.get(chr, pos);

        if (sameSnpQtls != null) {
            for (ReplicationQtl sameSnpQtl : sameSnpQtls) {
                if (sameSnpQtl.getGene().equals(gene)) {
                    bestMatch = sameSnpQtl;
                    bestMatchR2 = 1;
                }
            }
        }

        NavigableMap<Integer, ArrayList<ReplicationQtl>> potentionalReplicationQtls = replicationQtls
                .getChrRange(chr, pos - window, true, pos + window, true);

        for (ArrayList<ReplicationQtl> potentialReplicationQtls : potentionalReplicationQtls.values()) {

            for (ReplicationQtl potentialReplicationQtl : potentialReplicationQtls) {

                if (!potentialReplicationQtl.getGene().equals(gene)) {
                    continue;
                }

                GeneticVariant potentialReplicationQtlVariant = genotypeData
                        .getSnpVariantByPos(potentialReplicationQtl.getChr(), potentialReplicationQtl.getPos());

                if (potentialReplicationQtlVariant == null) {
                    notFound.add(potentialReplicationQtl.getChr() + ":" + potentialReplicationQtl.getPos());
                    ++replicationTopSnpNotInGenotypeData;
                    continue;
                }

                Ld ld = interactionQtlVariant.calculateLd(potentialReplicationQtlVariant);
                double r2 = ld.getR2();

                if (r2 > 1) {
                    r2 = 1;
                }

                if (bestMatch == null) {
                    bestMatch = potentialReplicationQtl;
                    bestMatchR2 = r2;
                    bestMatchLd = ld;
                } else if (r2 > bestMatchR2) {
                    bestMatch = potentialReplicationQtl;
                    nextBestR2 = bestMatchR2;
                    bestMatchR2 = r2;
                    bestMatchLd = ld;
                }

            }
        }

        double replicationZ = Double.NaN;
        double replicationZCorrected = Double.NaN;
        double discoveryZCorrected = Double.NaN;

        String replicationAlleleAssessed = null;

        if (bestMatch != null) {
            replicationZ = bestMatch.getBeta();
            replicationAlleleAssessed = bestMatch.getAssessedAllele();

            if (pos != bestMatch.getPos()) {

                String commonHap = null;
                double commonHapFreq = -1;
                for (Map.Entry<String, Double> hapFreq : bestMatchLd.getHaplotypesFreq().entrySet()) {

                    double f = hapFreq.getValue();

                    if (f > commonHapFreq) {
                        commonHapFreq = f;
                        commonHap = hapFreq.getKey();
                    }

                }

                String[] commonHapAlleles = StringUtils.split(commonHap, '/');

                discoveryZCorrected = commonHapAlleles[0].equals(alleleAssessed) ? discoveryZ : discoveryZ * -1;
                replicationZCorrected = commonHapAlleles[1].equals(replicationAlleleAssessed) ? replicationZ
                        : replicationZ * -1;

            } else {

                discoveryZCorrected = discoveryZ;
                replicationZCorrected = alleleAssessed.equals(replicationAlleleAssessed) ? replicationZ
                        : replicationZ * -1;

            }

        }

        c = 0;
        outputLine[c++] = chr;
        outputLine[c++] = String.valueOf(pos);
        outputLine[c++] = snp;
        outputLine[c++] = gene;
        outputLine[c++] = module;
        outputLine[c++] = String.valueOf(discoveryZ);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZ);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(discoveryZCorrected);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZCorrected);
        outputLine[c++] = alleleAssessed;
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(bestMatch.getAssessedAllele());
        outputLine[c++] = String.valueOf(bestMatchR2);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(Math.abs(pos - bestMatch.getPos()));
        outputLine[c++] = String.valueOf(nextBestR2);
        outputWriter.writeNext(outputLine);

    }

    outputWriter.close();

    for (String e : notFound) {
        System.err.println("Not found: " + e);
    }

    System.out.println("interactionSnpNotInGenotypeData: " + interactionSnpNotInGenotypeData);
    System.out.println("noReplicationQtlsInWindow: " + noReplicationQtlsInWindow);
    System.out.println("noReplicationQtlsInLd: " + noReplicationQtlsInLd);
    System.out.println("multipleReplicationQtlsInLd: " + multipleReplicationQtlsInLd);
    System.out.println("replicationTopSnpNotInGenotypeData: " + replicationTopSnpNotInGenotypeData);

}

From source file:discovery.compression.kdd2011.ratio.RatioCompressionReport.java

public static void main(String[] args) throws GraphReadingException, IOException, java.text.ParseException {
    opts.addOption("r", true, "Goal compression ratio");

    //      opts.addOption( "a",
    //       true,
    //       "Algorithm used for compression. The default and only currently available option is \"greedy\"");
    //opts.addOption("cost-output",true,"Output file for costs, default is costs.txt");
    //opts.addOption("cost-format",true,"Output format for ");

    opts.addOption("ctype", true, "Connectivity type: global or local, default is global.");
    opts.addOption("connectivity", false,
            "enables output for connectivity. Connectivity info will be written to connectivity.txt");
    opts.addOption("output_bmg", true, "Write bmg file with groups to given file.");
    opts.addOption("algorithm", true, "Algorithm to use, one of: greedy random1 random2 bruteforce slowgreedy");
    opts.addOption("hop2", false, "Only try to merge nodes that have common neighbors");
    opts.addOption("kmedoids", false, "Enables output for kmedoids clustering");
    opts.addOption("kmedoids_k", true, "Number of clusters to be used in kmedoids. Default is 3");
    opts.addOption("kmedoids_output", true,
            "Output file for kmedoid clusters. Default is clusters.txt. This file will be overwritten.");
    opts.addOption("norefresh", false,
            "Use old style merging: all connectivities are not refreshed when merging");
    opts.addOption("edge_attribute", true, "Attribute from bmgraph used as edge weight");
    opts.addOption("only_times", false, "Only write times.txt");
    //opts.addOption("no_metrics",false,"Exit after compression, don't calculate any metrics or produce output bmg for the compression.");
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;/*from   ww w  .j ava  2  s. c o m*/

    try {
        cmd = parser.parse(opts, args);
    } catch (ParseException e) {
        e.printStackTrace();
        System.exit(0);
    }

    boolean connectivity = false;
    double ratio = 0;

    boolean hop2 = cmd.hasOption("hop2");

    RatioCompression compression = new GreedyRatioCompression(hop2);

    if (cmd.hasOption("connectivity"))
        connectivity = true;

    ConnectivityType ctype = ConnectivityType.GLOBAL;
    CompressionMergeModel mergeModel = new PathAverageMergeModel();
    if (cmd.hasOption("ctype")) {
        String ctypeStr = cmd.getOptionValue("ctype");
        if (ctypeStr.equals("local")) {
            ctype = ConnectivityType.LOCAL;
            mergeModel = new EdgeAverageMergeModel();
        } else if (ctypeStr.equals("global")) {
            ctype = ConnectivityType.GLOBAL;
            mergeModel = new PathAverageMergeModel();
        } else {
            System.out.println(PROGRAM_NAME + ": unknown connectivity type " + ctypeStr);
            printHelp();
        }
    }

    if (cmd.hasOption("norefresh"))
        mergeModel = new PathAverageMergeModelNorefresh();
    if (cmd.hasOption("algorithm")) {
        String alg = cmd.getOptionValue("algorithm");
        if (alg.equals("greedy")) {
            compression = new GreedyRatioCompression(hop2);
        } else if (alg.equals("random1")) {
            compression = new RandomRatioCompression(hop2);
        } else if (alg.equals("random2")) {
            compression = new SmartRandomRatioCompression(hop2);
        } else if (alg.equals("bruteforce")) {
            compression = new BruteForceCompression(hop2, ctype == ConnectivityType.LOCAL);
        } else if (alg.equals("slowgreedy")) {
            compression = new SlowGreedyRatioCompression(hop2);
        } else {
            System.out.println("algorithm must be one of: greedy random1 random2 bruteforce slowgreedy");
            printHelp();
        }
    }

    compression.setMergeModel(mergeModel);

    if (cmd.hasOption("r")) {
        ratio = Double.parseDouble(cmd.getOptionValue("r"));
    } else {
        System.out.println(PROGRAM_NAME + ": compression ratio not defined");
        printHelp();
    }

    if (cmd.hasOption("help")) {
        printHelp();
    }

    String infile = null;
    if (cmd.getArgs().length != 0) {
        infile = cmd.getArgs()[0];
    } else {
        printHelp();
    }

    boolean kmedoids = false;
    int kmedoidsK = 3;
    String kmedoidsOutput = "clusters.txt";
    if (cmd.hasOption("kmedoids"))
        kmedoids = true;
    if (cmd.hasOption("kmedoids_k"))
        kmedoidsK = Integer.parseInt(cmd.getOptionValue("kmedoids_k"));
    if (cmd.hasOption("kmedoids_output"))
        kmedoidsOutput = cmd.getOptionValue("kmedoids_output");

    String edgeAttrib = "goodness";
    if (cmd.hasOption("edge_attribute"))
        edgeAttrib = cmd.getOptionValue("edge_attribute");

    // This program should directly use bmgraph-java to read and
    // DefaultGraph should have a constructor that takes a BMGraph as an
    // argument.

    //VisualGraph vg = new VisualGraph(infile, edgeAttrib, false);
    //System.out.println("vg read");
    //SimpleVisualGraph origSG = new SimpleVisualGraph(vg);
    BMGraph bmg = BMGraphUtils.readBMGraph(infile);

    int origN = bmg.getNodes().size();

    //for(int i=0;i<origN;i++)
    //System.out.println(i+"="+origSG.getVisualNode(i));
    System.out.println("bmgraph read");

    BMNode[] i2n = new BMNode[origN];
    HashMap<BMNode, Integer> n2i = new HashMap<BMNode, Integer>();
    {
        int pi = 0;
        for (BMNode nod : bmg.getNodes()) {
            n2i.put(nod, pi);
            i2n[pi++] = nod;
        }
    }

    DefaultGraph dg = new DefaultGraph();
    for (BMEdge e : bmg.getEdges()) {
        dg.addEdge(n2i.get(e.getSource()), n2i.get(e.getTarget()), Double.parseDouble(e.get(edgeAttrib)));
    }

    DefaultGraph origDG = dg.copy();

    System.out.println("inputs read");
    RatioCompression nopCompressor = new RatioCompression.DefaultRatioCompression();
    ResultGraph nopResult = nopCompressor.compressGraph(dg, 1);

    long start = System.currentTimeMillis();
    ResultGraph result = compression.compressGraph(dg, ratio);
    long timeSpent = System.currentTimeMillis() - start;
    double seconds = timeSpent * 0.001;

    BufferedWriter timesWriter = new BufferedWriter(new FileWriter("times.txt", true));
    timesWriter.append("" + seconds + "\n");
    timesWriter.close();

    if (cmd.hasOption("only_times")) {
        System.out.println("Compression done, exiting.");
        System.exit(0);
    }

    BufferedWriter costsWriter = new BufferedWriter(new FileWriter("costs.txt", true));
    costsWriter.append("" + nopResult.getCompressorCosts() + " " + result.getCompressorCosts() + "\n");
    costsWriter.close();

    double[][] origProb;
    double[][] compProb;
    int[] group = new int[origN];

    for (int i = 0; i < result.partition.size(); i++)
        for (int x : result.partition.get(i))
            group[x] = i;

    if (ctype == ConnectivityType.LOCAL) {
        origProb = new double[origN][origN];
        compProb = new double[origN][origN];
        DefaultGraph g = result.uncompressedGraph();
        for (int i = 0; i < origN; i++) {
            for (int j = 0; j < origN; j++) {
                origProb[i][j] = dg.getEdgeWeight(i, j);
                compProb[i][j] = g.getEdgeWeight(i, j);
            }
        }
        System.out.println("Writing edge-dissimilarity");
    } else {

        origProb = ProbDijkstra.getProbMatrix(origDG);

        compProb = new double[origN][origN];

        System.out.println("nodeCount = " + result.graph.getNodeCount());
        double[][] ccProb = ProbDijkstra.getProbMatrix(result.graph);
        System.out.println("ccProb.length = " + ccProb.length);

        System.out.println("ccProb[0].length = " + ccProb[0].length);

        for (int i = 0; i < origN; i++) {
            for (int j = 0; j < origN; j++) {
                if (group[i] == group[j])
                    compProb[i][j] = result.graph.getEdgeWeight(group[i], group[j]);
                else {
                    int gj = group[j];
                    int gi = group[i];
                    compProb[i][j] = ccProb[group[i]][group[j]];
                }
            }
        }

        System.out.println("Writing best-path-dissimilarity");
        //compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph());

    }

    {
        BufferedWriter connWr = null;//

        if (connectivity) {
            connWr = new BufferedWriter(new FileWriter("connectivity.txt", true));
        }
        double totalDiff = 0;

        for (int i = 0; i < origN; i++) {
            for (int j = i + 1; j < origN; j++) {

                double diff = Math.abs(origProb[i][j] - compProb[i][j]);
                //VisualNode ni = origSG.getVisualNode(i);
                //VisualNode nj = origSG.getVisualNode(j);
                BMNode ni = i2n[i];
                BMNode nj = i2n[j];
                if (connectivity)
                    connWr.append(ni + "\t" + nj + "\t" + origProb[i][j] + "\t" + compProb[i][j] + "\t" + diff
                            + "\n");
                totalDiff += diff * diff;
            }
        }

        if (connectivity) {
            connWr.append("\n");
            connWr.close();
        }

        totalDiff = Math.sqrt(totalDiff);
        BufferedWriter dissWr = new BufferedWriter(new FileWriter("dissimilarity.txt", true));
        dissWr.append("" + totalDiff + "\n");
        dissWr.close();
    }

    if (cmd.hasOption("output_bmg")) {
        BMGraph outgraph = new BMGraph();

        String outputfile = cmd.getOptionValue("output_bmg");
        HashMap<Integer, BMNode> nodes = new HashMap<Integer, BMNode>();

        for (int i = 0; i < result.partition.size(); i++) {
            ArrayList<Integer> g = result.partition.get(i);
            if (g.size() == 0)
                continue;
            BMNode node = new BMNode("Supernode_" + i);
            HashMap<String, String> attributes = new HashMap<String, String>();
            StringBuffer contents = new StringBuffer();
            for (int x : g)
                contents.append(i2n[x] + ",");
            contents.delete(contents.length() - 1, contents.length());

            attributes.put("nodes", contents.toString());
            attributes.put("self-edge", "" + result.graph.getEdgeWeight(i, i));
            node.setAttributes(attributes);
            nodes.put(i, node);
            outgraph.ensureHasNode(node);
        }

        for (int i = 0; i < result.partition.size(); i++) {
            if (result.partition.get(i).size() == 0)
                continue;
            for (int x : result.graph.getNeighbors(i)) {
                if (x < i)
                    continue;
                BMNode from = nodes.get(i);
                BMNode to = nodes.get(x);
                if (from == null || to == null) {
                    System.out.println(from + "->" + to);
                    System.out.println(i + "->" + x);
                    System.out.println("");
                }
                BMEdge e = new BMEdge(nodes.get(i), nodes.get(x), "notype");

                e.setAttributes(new HashMap<String, String>());
                e.put("goodness", "" + result.graph.getEdgeWeight(i, x));
                outgraph.ensureHasEdge(e);
            }
        }
        BMGraphUtils.writeBMGraph(outgraph, outputfile);
    }

    // k medoids!
    if (kmedoids) {
        //KMedoidsResult clustersOrig=KMedoids.runKMedoids(origProb,kmedoidsK);

        if (ctype == ConnectivityType.LOCAL) {
            compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph());
        }

        //KMedoidsResult compClusters = KMedoids.runKMedoids(ProbDijkstra.getProbMatrix(result.graph),kmedoidsK);
        KMedoidsResult clustersComp = KMedoids.runKMedoids(compProb, kmedoidsK);

        BufferedWriter bw = new BufferedWriter(new FileWriter(kmedoidsOutput));

        for (int i = 0; i < origN; i++) {
            int g = group[i];
            //bw.append(origSG.getVisualNode(i).getBMNode()+" "+compClusters.clusters[g]+"\n");
            bw.append(i2n[i] + " " + clustersComp.clusters[i] + "\n");
        }
        bw.close();
    }

    System.exit(0);
}

From source file:lemontree.modulenetwork.RunCli.java

/**
 * Parse command line options and run LeMoNe 
 * /*  w  w  w  .j  a  va  2 s  .co m*/
 * @param args command-line arguments string
 *            
 */
public static void main(String[] args) {

    // set dummy values for those parameters, they'll be filled later
    String task = null;
    String gene_file = null;
    String data_file = null;
    String reg_file = null;
    String tree_file = null;
    String output_file = null;
    String range = null;
    int num_steps = 0;
    int burn_in = 0;
    int sample_steps = 0;
    String cluster_file = null;
    String go_annot_file = null;
    String go_ref_file = null;
    String top_regulators = null;
    String map_file = null;
    String go_ontology_file = null;
    String draw_experiment_color = null;

    // set default values for those parameters, users can override them
    double alpha = 0.1;
    double beta = 0.1;
    double mu = 0.0;
    double lambda = 0.1;
    double score_gain = 0.0;
    int init_num_clust = 0;
    int num_runs = 1;
    boolean use_bayesian_score = true;
    int num_reg = 10;
    double beta_reg = 20;
    String go_p_value = "0.05";
    String go_namespace = "biological_process";
    boolean use_global_mean = false;
    boolean use_regulator_mean = false;
    int cut_level = 0;
    double min_weight = 0.25;
    int min_clust_size = 10;
    int min_clust_score = 2;
    Boolean node_clustering = true;
    boolean draw_experiment_names = true;

    // create the different options
    Options opts = new Options();
    opts.addOption("task", true, "task to perform");
    opts.addOption("gene_file", true, "gene file");
    opts.addOption("data_file", true, "data file (genes)");
    opts.addOption("reg_file", true, "regulators file");
    opts.addOption("tree_file", true, "tree file");
    opts.addOption("output_file", true, "output file name");
    opts.addOption("num_steps", true, "number of steps (Gibbs sampler)");
    opts.addOption("burn_in", true, "number of burn-in steps (Gibbs sampler)");
    opts.addOption("sample_steps", true, "sample steps interval (Gibbs sampler)");
    opts.addOption("cluster_file", true, "cluster file name");
    opts.addOption("num_clust", true, "number of clusters");
    opts.addOption("alpha", true, "alpha0 parameter value");
    opts.addOption("beta", true, "beta0 parameter value");
    opts.addOption("mu", true, "mu0 parameter value");
    opts.addOption("lambda", true, "lambda0 parameter value");
    opts.addOption("score_gain", true, "score gain cutoff value");
    opts.addOption("init_num_clust", true, "initial number of clusters (Gibbs sampler)");
    opts.addOption("num_runs", true, "number of runs (Gibbs sampler)");
    opts.addOption("num_reg", true, "maximum number of regulators assigned for each node");
    opts.addOption("beta_reg", true, "beta parameter value for regulators assignment");
    opts.addOption("num_split", true, "number of splits for the module set");
    opts.addOption("prefix", true, "java command prefix for the split option command line");
    opts.addOption("range", true, "module set range for the assignment of the regulators");
    opts.addOption("go_annot_file", true, "GO custom annotation file");
    opts.addOption("go_ontology_file", true, "GO ontology file name");
    opts.addOption("go_ref_file", true, "GO refence gene list file name");
    opts.addOption("go_p_value", true, "GO p-value cutoff");
    opts.addOption("go_namespace", true, "GO namespace");
    opts.addOption("go_annot_def", false, "GO annotation file flag");
    opts.addOption("matlab", false, "Matlab format for output files");
    opts.addOption("help", false, "help");
    opts.addOption("h", false, "help");
    opts.addOption("top_regulators", true, "Top regulators file name");
    opts.addOption("use_global_mean", false, "Use global mean for the figures");
    opts.addOption("use_regulator_mean", false, "Use regulator mean for the figures");
    opts.addOption("all_regulators", false, "Print all regulators");
    opts.addOption("map_file", true, "Gene names map file");
    opts.addOption("cut_level", true, "Regulation tree cut level");
    opts.addOption("min_weight", true, "Tight clusters minimum weight");
    opts.addOption("min_clust_size", true, "Tight clusters minimum cluster size");
    opts.addOption("min_clust_score", true, "Tight clusters minimum cluster score");
    opts.addOption("node_clustering", true, "Perform node clustering (true) or edge clustering (false)");
    opts.addOption("draw_experiment_names", true, "Draw experiment names in the figures");
    opts.addOption("draw_experiment_color", true, "Draw experiment color codes in the figures");

    // build a parser object and parse the command line (!)
    CommandLineParser parser = new PosixParser();
    try {

        CommandLine cmd = parser.parse(opts, args);
        if (cmd.hasOption("min_weight"))
            min_weight = Double.parseDouble(cmd.getOptionValue("min_weight"));

        if (cmd.hasOption("min_clust_size"))
            min_clust_size = Integer.parseInt(cmd.getOptionValue("min_clust_size"));

        if (cmd.hasOption("min_clust_score"))
            min_clust_score = Integer.parseInt(cmd.getOptionValue("min_clust_score"));

        if (cmd.hasOption("task"))
            task = cmd.getOptionValue("task");

        if (cmd.hasOption("data_file"))
            data_file = cmd.getOptionValue("data_file");

        if (cmd.hasOption("tree_file"))
            tree_file = cmd.getOptionValue("tree_file");

        if (cmd.hasOption("gene_file"))
            gene_file = cmd.getOptionValue("gene_file");

        if (cmd.hasOption("reg_file"))
            reg_file = cmd.getOptionValue("reg_file");

        if (cmd.hasOption("output_file"))
            output_file = cmd.getOptionValue("output_file");

        if (cmd.hasOption("cluster_file"))
            cluster_file = cmd.getOptionValue("cluster_file");

        if (cmd.hasOption("alpha"))
            alpha = Double.parseDouble(cmd.getOptionValue("alpha"));

        if (cmd.hasOption("beta"))
            beta = Double.parseDouble(cmd.getOptionValue("beta"));

        if (cmd.hasOption("lambda"))
            lambda = Double.parseDouble(cmd.getOptionValue("lambda"));

        if (cmd.hasOption("mu"))
            mu = Double.parseDouble(cmd.getOptionValue("mu"));

        if (cmd.hasOption("score_gain"))
            score_gain = Double.parseDouble(cmd.getOptionValue("score_gain"));

        if (cmd.hasOption("num_steps"))
            num_steps = Integer.parseInt(cmd.getOptionValue("num_steps"));

        if (cmd.hasOption("burn_in"))
            burn_in = Integer.parseInt(cmd.getOptionValue("burn_in"));

        if (cmd.hasOption("sample_steps"))
            sample_steps = Integer.parseInt(cmd.getOptionValue("sample_steps"));

        if (cmd.hasOption("init_num_clust"))
            init_num_clust = Integer.parseInt(cmd.getOptionValue("init_num_clust"));

        if (cmd.hasOption("num_reg"))
            num_reg = Integer.parseInt(cmd.getOptionValue("num_reg"));

        if (cmd.hasOption("beta_reg"))
            beta_reg = Double.parseDouble(cmd.getOptionValue("beta_reg"));

        if (cmd.hasOption("range"))
            range = cmd.getOptionValue("range");

        if (cmd.hasOption("go_annot_file"))
            go_annot_file = cmd.getOptionValue("go_annot_file");

        if (cmd.hasOption("go_ontology_file"))
            go_ontology_file = cmd.getOptionValue("go_ontology_file");

        if (cmd.hasOption("go_ref_file"))
            go_ref_file = cmd.getOptionValue("go_ref_file");

        if (cmd.hasOption("go_p_value"))
            go_p_value = cmd.getOptionValue("go_p_value");

        if (cmd.hasOption("go_namespace"))
            go_namespace = cmd.getOptionValue("go_namespace");

        if (cmd.hasOption("top_regulators"))
            top_regulators = cmd.getOptionValue("top_regulators");

        if (cmd.hasOption("use_global_mean"))
            use_global_mean = true;

        if (cmd.hasOption("use_regulator_mean"))
            use_regulator_mean = true;

        if (cmd.hasOption("map_file"))
            map_file = cmd.getOptionValue("map_file");

        if (cmd.hasOption("cut_level"))
            cut_level = Integer.parseInt(cmd.getOptionValue("cut_level"));

        if (cmd.hasOption("node_clustering"))
            if (cmd.getOptionValue("node_clustering").equalsIgnoreCase("false"))
                node_clustering = false;

        if (cmd.hasOption("draw_experiment_names"))
            if (cmd.getOptionValue("draw_experiment_names").equalsIgnoreCase("false"))
                draw_experiment_names = false;

        if (cmd.hasOption("draw_experiment_color"))
            draw_experiment_color = cmd.getOptionValue("draw_experiment_color");

    } catch (ParseException exp) {
        System.out.println("Error while parsing command line:");
        System.out.println();
        exp.printStackTrace();
        System.exit(1);
    }

    // print header
    printBanner();

    // something has to be done, we need a task to be set
    if (task == null)
        Die("Error: task option must be set.");

    // ---------------------------------------------------------------
    // ganesh task: 2-way clustering of genes using the gibbs sampler
    // ---------------------------------------------------------------
    if (task.equalsIgnoreCase("ganesh")) {

        // those parameters must be set
        if (data_file == null)
            Die("Error: data_file option must be set.");
        if (output_file == null)
            Die("Error: output_file option must be set.");

        // set default values if the user did not change them
        if (num_runs == 0)
            num_runs = 1;
        if (num_steps == 0)
            num_steps = 100;
        if (burn_in == 0)
            burn_in = 50;
        if (sample_steps == 0)
            sample_steps = 100;

        System.out.println("Parameters");
        System.out.println("----------");
        System.out.println("task:               " + task);
        System.out.println("data_file:          " + data_file);
        System.out.println("gene_file:          " + gene_file);
        System.out.println("output_file:        " + output_file);
        System.out.println("lambda:             " + lambda);
        System.out.println("mu:                 " + mu);
        System.out.println("alpha:              " + alpha);
        System.out.println("beta:               " + beta);
        System.out.println("num_steps:          " + num_steps);
        System.out.println("burn_in:            " + burn_in);
        System.out.println("sample_steps:       " + sample_steps);
        System.out.println("score_gain:         " + score_gain);

        // Create ModuleNetwork object
        ModuleNetwork M = new ModuleNetwork();
        M.setNormalGammaPriors(lambda, mu, alpha, beta);
        M.readExpressionMatrix(data_file, gene_file);
        M.setNormalGammaPriors(lambda, mu, alpha, beta);
        // Gibbs sample different module sets with one tree per module
        M.gibbsSamplerGenes(init_num_clust, num_runs, num_steps, burn_in, sample_steps, score_gain,
                use_bayesian_score);
        // write results to text file
        M.writeClusters(output_file);
    }
    //-------------------------------------------------------------------------------------
    // tight_clusters task: node clustering to produce tight clusters
    //-------------------------------------------------------------------------------------
    else if (task.equalsIgnoreCase("tight_clusters")) {

        if (data_file == null)
            Die("Error: data_file option must be set.");
        if (cluster_file == null)
            Die("Error: cluster_file option must be set.");
        if (output_file == null)
            Die("Error: output_file option must be set.");

        System.out.println("Parameters");
        System.out.println("----------");
        System.out.println("task:               " + task);
        System.out.println("data_file:          " + data_file);
        System.out.println("cluster_file:       " + cluster_file);
        System.out.println("output_file:        " + output_file);
        System.out.println("node_clustering:    " + node_clustering);
        System.out.println("min_weight:         " + min_weight);
        System.out.println("min_clust_size:     " + min_clust_size);
        System.out.println("min_clust_score:    " + min_clust_score);
        System.out.println();

        ModuleNetwork M = new ModuleNetwork();
        M.readExpressionMatrix(data_file, null);
        M.readMultipleClusters(cluster_file);

        // find tight clusters with node clustering algorithm
        CentroidClustering cc = new CentroidClustering(M, node_clustering, min_weight, min_clust_size,
                min_clust_score);
        cc.doCentroidClustering();
        cc.printClusters(output_file);

    }
    //-------------------------------------------------------------------------------------
    // regulators task: learn regulation programs (gibbs sampling exp. + assign regulators)
    //-------------------------------------------------------------------------------------
    else if (task.equalsIgnoreCase("regulators")) {

        // those parameters must be set
        if (data_file == null)
            Die("Error: data_file option must be set.");
        if (reg_file == null)
            Die("Error: reg_file option must be set.");
        if (cluster_file == null)
            Die("Error: cluster_file option must be set.");
        if (output_file == null)
            Die("Error: output_file option must be set.");

        // set default values if the user did not change them
        if (num_steps == 0)
            num_steps = 1100;
        if (burn_in == 0)
            burn_in = 100;
        if (sample_steps == 0)
            sample_steps = 100;

        System.out.println("Parameters");
        System.out.println("----------");
        System.out.println("task:               " + task);
        System.out.println("data_file:          " + data_file);
        System.out.println("reg_file:           " + reg_file);
        System.out.println("cluster_file:       " + cluster_file);
        System.out.println("output_file:        " + output_file);
        System.out.println("lambda:             " + lambda);
        System.out.println("mu:                 " + mu);
        System.out.println("alpha:              " + alpha);
        System.out.println("beta:               " + beta);
        System.out.println("num_runs:           " + num_runs);
        System.out.println("num_steps:          " + num_steps);
        System.out.println("burn_in:            " + burn_in);
        System.out.println("sample_steps:       " + sample_steps);
        System.out.println("score_gain:         " + score_gain);
        System.out.println("num_reg:            " + num_reg);

        // create module network object
        ModuleNetwork M = new ModuleNetwork();
        M.setNormalGammaPriors(lambda, mu, alpha, beta);
        M.readExpressionMatrix(data_file, null);
        M.readClusters(cluster_file);
        M.readRegulators(reg_file);
        M.initStatisticsAndScore();
        M.setDataMeanAndSDFromModuleset();

        // cluster experiments using the gibbs sampler
        M.gibbsSamplerExpts(num_runs, num_steps, burn_in, sample_steps, score_gain, use_bayesian_score);
        // assign regulators
        M.assignRegulatorsNoAcyclStoch(beta_reg, num_reg);
        // write results as text file with all regulators, top 1%, random regulators and regulations trees as xml
        M.printRegulators(output_file + ".allreg.txt", true, false);
        M.printRegulators(output_file + ".topreg.txt", false, false);
        M.printRandomRegulators(output_file + ".randomreg.txt", false);
        M.writeRegTreeXML(output_file + ".xml.gz");
    }
    //----------------------------------------------------------
    // experiments task: cluster conditions using gibbs sampling 
    //----------------------------------------------------------
    else if (task.equalsIgnoreCase("experiments")) {

        // those parameters must be set
        if (data_file == null)
            Die("Error: data_file option must be set.");
        if (cluster_file == null)
            Die("Error: cluster_file option must be set.");
        if (output_file == null)
            Die("Error: output_file option must be set.");

        // set default values if the user did not change them
        if (num_steps == 0)
            num_steps = 1100;
        if (burn_in == 0)
            burn_in = 100;
        if (sample_steps == 0)
            sample_steps = 100;

        System.out.println("Parameters");
        System.out.println("----------");
        System.out.println("task:               " + task);
        System.out.println("data_file:          " + data_file);
        System.out.println("cluster_file:       " + cluster_file);
        System.out.println("output_file:        " + output_file);
        System.out.println("lambda:             " + lambda);
        System.out.println("mu:                 " + mu);
        System.out.println("alpha:              " + alpha);
        System.out.println("beta:               " + beta);
        System.out.println("num_steps:          " + num_steps);
        System.out.println("burn_in:            " + burn_in);
        System.out.println("sample_steps:       " + sample_steps);
        System.out.println("score_gain:         " + score_gain);

        // read data and clusters
        ModuleNetwork M = new ModuleNetwork();
        M.setNormalGammaPriors(lambda, mu, alpha, beta);
        M.readExpressionMatrix(data_file, null);
        M.readClusters(cluster_file);
        M.initStatisticsAndScore();

        // cluster experiments using the gibbs sampler
        M.gibbsSamplerExpts(num_runs, num_steps, burn_in, sample_steps, score_gain, use_bayesian_score);

        // write results as xml file
        M.writeRegTreeXML(output_file);
    }
    //---------------------------------------------------------------
    // split_reg task: assign regulators for a given range of modules
    //---------------------------------------------------------------
    else if (task.equalsIgnoreCase("split_reg")) {

        // those parameters must be set
        if (data_file == null)
            Die("Error: data_file option must be set.");
        if (reg_file == null)
            Die("Error: reg_file option must be set.");
        if (cluster_file == null)
            Die("Error: cluster_file option must be set.");
        if (tree_file == null)
            Die("Error: tree_file option must be set.");
        if (output_file == null)
            Die("Error: output_file option must be set.");
        if (range == null)
            Die("Error: range option must be set.");

        System.out.println("Parameters");
        System.out.println("----------");
        System.out.println("task:               " + task);
        System.out.println("data_file:          " + data_file);
        System.out.println("reg_file:           " + reg_file);
        System.out.println("cluster_file:       " + cluster_file);
        System.out.println("output_file:        " + output_file);
        System.out.println("num_reg:            " + num_reg);
        System.out.println("beta_reg:           " + beta_reg);
        System.out.println("range:              " + range);

        ModuleNetwork M = new ModuleNetwork();
        M.setNormalGammaPriors(lambda, mu, alpha, beta);
        M.readExpressionMatrix(data_file, null);
        M.readClusters(cluster_file);
        M.readRegulators(reg_file);
        M.initStatisticsAndScore();
        M.readRegTreeXML(tree_file);

        String[] val = range.split(":");
        int start_module = Integer.parseInt(val[0]);
        int stop_module = Integer.parseInt(val[1]);

        // assign regulators
        M.assignRegulatorsNoAcyclStoch(beta_reg, num_reg, start_module, stop_module);

        // write results
        M.printRegulators(output_file + ".allreg.txt", true, false);
        M.printRandomRegulators(output_file + ".randomreg.txt", false);
        M.writeRegTreeXML(output_file + ".xml.gz");
    }
    //----------------------------------------------------------------------------
    // go_annotation task: GO annotation of a cluster file
    //----------------------------------------------------------------------------
    else if (task.equalsIgnoreCase("go_annotation")) {

        // those parameters must be set
        if (cluster_file == null)
            Die("Error: cluster_file parameter must be set.");
        if (output_file == null)
            Die("Error: output_file option must be set.");
        if (go_annot_file == null)
            Die("Error: go_annot_file option must be set.");
        if (go_ontology_file == null)
            Die("Error: go_ontology_file option must be set.");
        if (go_ref_file == null)
            Die("Error: go_ref_file option must be set.");

        System.out.println("Parameters");
        System.out.println("----------");
        System.out.println("task:               " + task);
        System.out.println("cluster_file:       " + cluster_file);
        System.out.println("output_file:        " + output_file);
        System.out.println("go_annot_file:      " + go_annot_file);
        System.out.println("go_ontology_file:   " + go_ontology_file);
        System.out.println("go_ref_file:        " + go_ref_file);
        System.out.println("go_p_value:         " + go_p_value);
        System.out.println("go_namespace:       " + go_namespace);
        System.out.println("map_file            " + map_file);

        BiNGO b = new BiNGO(go_annot_file, go_ontology_file, go_p_value, go_namespace);

        try {
            b.GOstats(cluster_file, go_ref_file, output_file, map_file);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    //----------------------------------------------------------------------------
    // figures task: create eps figures for each module
    //----------------------------------------------------------------------------
    else if (task.equalsIgnoreCase("figures")) {

        // those parameters must be set
        if (top_regulators == null)
            Die("Error: top_regulators option must be set.");
        if (data_file == null)
            Die("Error: data_file option must be set.");
        if (reg_file == null)
            Die("Error: reg_file option must be set.");
        if (cluster_file == null)
            Die("Error: cluster_file option must be set.");
        if (tree_file == null)
            Die("Error: tree_file option must be set.");

        System.out.println("Parameters");
        System.out.println("----------");
        System.out.println("task:                  " + task);
        System.out.println("data_file:             " + data_file);
        System.out.println("reg_file:              " + reg_file);
        System.out.println("cluster_file:          " + cluster_file);
        System.out.println("tree_file:             " + tree_file);
        System.out.println("top_regulators:        " + top_regulators);
        System.out.println("use_regulator_mean:    " + use_regulator_mean);
        System.out.println("use_global_mean:       " + use_global_mean);
        System.out.println("map_file:              " + map_file);
        System.out.println("cut_level:             " + cut_level);
        System.out.println("draw_experiment_names: " + draw_experiment_names);
        System.out.println("draw_experiment_color: " + draw_experiment_color);

        ModuleNetwork M = new ModuleNetwork();
        //read expression data, genes, clusters and regulators from files
        M.setNormalGammaPriors(lambda, mu, alpha, beta);
        M.readExpressionMatrix(data_file, null);
        M.readClusters(cluster_file);
        M.readRegulators(reg_file);
        M.initStatisticsAndScore();
        M.setDataMeanAndSDFromModuleset();

        // read regulation trees from xml file
        M.readRegTreeXML(tree_file);
        M.setTestSplits();
        // set top regulators for each module
        M.setTopRegulatorClasses(top_regulators);
        // calculate mean and sigma for all modules
        M.setModuleMeanSigma();
        M.checkExperiments();
        // use module mean (default) or global mean for figures
        M.setGlobalMeanForFigures(use_global_mean);
        // use individual regulators mean for figures (default false)
        M.setRegulatorlMeanForFigures(use_regulator_mean);
        if (use_regulator_mean == true)
            M.setRegulatorMeanSigma();
        // change gene names if a map file is given
        if (map_file != null)
            M.changeGeneNames(map_file);
        // cut trees to a certain level
        if (cut_level > 0) {
            for (Module mod : M.moduleSet) {
                for (TreeNode t : mod.hierarchicalTrees) {
                    t.testLevel(cut_level);
                }
            }
        }

        DrawModules dm = new DrawModules(M);

        if (draw_experiment_color != null) {
            M.setExperimentColor(draw_experiment_color);
            dm.enableExperimentColor();
        }

        if (draw_experiment_names == false) {
            dm.unsetDrawExperimentNames();
        }

        dm.drawAllModules();
    }
    //----------------------------------------------------------------------------
    // topdown task: run "old" heuristic algo
    //----------------------------------------------------------------------------
    else if (task.equalsIgnoreCase("topdown")) {
        int maxParents = 3;
        double epsConvergence = 1E-3;
        // Create ModuleNetwork object
        ModuleNetwork M = new ModuleNetwork();
        M.setNormalGammaPriors(lambda, mu, alpha, beta);
        M.readExpressionMatrix(data_file, gene_file);
        M.setNormalGammaPriors(lambda, mu, alpha, beta);
        M.readRegulators(reg_file);
        // Top-down search
        M.heuristicSearchMaxTopDown(maxParents, epsConvergence);
        // write results as xml file
        M.writeRegTreeXML(output_file);
    } else {
        System.out.println("task option '" + task + "' unknown.");
        System.out.println();
    }
}

From source file:com.genentech.chemistry.openEye.apps.SDFMCSSNNFinder.java

public static void main(String... args) throws IOException {
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;/*from ww  w  .j a  va 2 s  . c om*/
    try {
        cmd = parser.parse(options, args);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        exitWithHelp();
    }

    args = cmd.getArgs();
    if (args.length > 0) {
        exitWithHelp("Unknown param: " + args[0]);
    }

    if (cmd.hasOption("d")) {
        System.err.println("Start debugger and press return:");
        new BufferedReader(new InputStreamReader(System.in)).readLine();
    }

    int nCpu = 1;
    int maxNeighbors = 1;
    double minSim = 0D;

    String idTag = cmd.getOptionValue("idTag");
    boolean printAll = cmd.hasOption("printAll");

    String d = cmd.getOptionValue("nCpu");
    if (d != null)
        nCpu = Integer.parseInt(d);

    d = cmd.getOptionValue("maxNeighbors");
    if (d != null)
        maxNeighbors = Integer.parseInt(d);

    d = cmd.getOptionValue("minSimilarity");
    if (d != null)
        minSim = Double.parseDouble(d);

    String countAboveSimilarityStr = cmd.getOptionValue("countSimilarAbove");

    String inFile = cmd.getOptionValue("in");
    String outFile = cmd.getOptionValue("out");
    String refFile = cmd.getOptionValue("ref");

    String tabOutput = cmd.getOptionValue("tabOutput");
    boolean outputDuplicates = cmd.hasOption("outputDuplicates");

    if (outputDuplicates && tabOutput != null)
        exitWithHelp("-outputDuplicates will not work with outputVTab");
    if (outputDuplicates && refFile == null)
        exitWithHelp("-outputDuplicates requires -ref ");
    if ("tab".equalsIgnoreCase(tabOutput) && refFile != null)
        exitWithHelp("-tabOutput tab: does not work with reference file");
    if ("tab".equalsIgnoreCase(tabOutput) && maxNeighbors == 1)
        exitWithHelp("-tabOutput tab: does not make sense with -maxNeighbors = 1");
    if (cmd.hasOption("countSimilarAbove") && tabOutput != null)
        exitWithHelp("-countSimilarAbove not supported for tab or vTab output");
    if (printAll && !(maxNeighbors > 1 || minSim > 0))
        exitWithHelp("printAll only supported if: maxNeighbors > 1 or minSim > 0");

    if (printAll && tabOutput != null)
        System.err.println("WARNING: printAll ignored for tab output!\n");

    SimComparatorFactory<OEMolBase, OEMolBase, SimComparator<OEMolBase>> compFact;
    compFact = getComparatorFactory(cmd);

    if (refFile == null) { // no reference file; run all by all comparison
        performMatrixNNSearch(inFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu,
                countAboveSimilarityStr, printAll);

    } else { // refrence file; compare inFile to refFile
        performReferenceSearch(inFile, refFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu,
                countAboveSimilarityStr, outputDuplicates, printAll);
    }

}

From source file:erigo.filepump.FilePump.java

public static void main(String[] argsI) {

    boolean local_bShowGUI = true;

    String initial_outputFolder = ".";
    double initial_filesPerSec = 1.0;
    int initial_totNumFiles = 1000;
    String initial_mode_str = "local";
    String initial_ftpHost;//from  w  ww. j  av  a  2s  . c  o m
    String initial_ftpUser;
    String initial_ftpPassword;

    //
    // Parse command line arguments
    //
    // We use the Apche Commons CLI library to handle command line
    // arguments. See https://commons.apache.org/proper/commons-cli/usage.html
    // for examples, although note that we use the more up-to-date form
    // (Option.builder) to create Option objects.
    //
    // 1. Setup command line options
    //
    Options options = new Options();
    // Example of a Boolean option (i.e., only the flag, no argument goes with it)
    options.addOption("h", "help", false, "Print this message.");
    // The following example is for: -outputfolder <folder>    Location of output files
    Option outputFolderOption = Option.builder("outputfolder").argName("folder").hasArg()
            .desc("Location of output files; this folder must exist (it will not be created); default = \""
                    + initial_outputFolder + "\".")
            .build();
    options.addOption(outputFolderOption);
    Option filesPerSecOption = Option.builder("fps").argName("filespersec").hasArg()
            .desc("Desired file rate, files/sec; default = " + initial_filesPerSec + ".").build();
    options.addOption(filesPerSecOption);
    Option totNumFilesOption = Option.builder("totnum").argName("num").hasArg().desc(
            "Total number of output files; use -1 for unlimited number; default = " + initial_totNumFiles + ".")
            .build();
    options.addOption(totNumFilesOption);
    Option outputModeOption = Option.builder("mode").argName("mode").hasArg()
            .desc("Specifies output interface, one of <local|ftp|sftp>; default = " + initial_mode_str + ".")
            .build();
    options.addOption(outputModeOption);
    Option ftpHostOption = Option.builder("ftphost").argName("host").hasArg()
            .desc("Host name, for FTP or SFTP.").build();
    options.addOption(ftpHostOption);
    Option ftpUsernameOption = Option.builder("ftpuser").argName("user").hasArg()
            .desc("Username, for FTP or SFTP.").build();
    options.addOption(ftpUsernameOption);
    Option ftpPasswordOption = Option.builder("ftppass").argName("password").hasArg()
            .desc("Password, for FTP or SFTP.").build();
    options.addOption(ftpPasswordOption);
    Option autoRunOption = new Option("x", "Automatically run at startup.");
    options.addOption(autoRunOption);
    //
    // 2. Parse command line options
    //
    CommandLineParser parser = new DefaultParser();
    CommandLine line = null;
    try {
        line = parser.parse(options, argsI);
    } catch (ParseException exp) {
        // oops, something went wrong
        System.err.println("Command line argument parsing failed: " + exp.getMessage());
        return;
    }
    //
    // 3. Retrieve the command line values
    //
    if (line.hasOption("help")) {
        // Display help message and quit
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("FilePump", options);
        return;
    }
    if (line.hasOption("x")) {
        local_bShowGUI = false;
    }
    // Where to write the files to
    initial_outputFolder = line.getOptionValue("outputfolder", initial_outputFolder);
    // How many files per second the pump should output
    try {
        initial_filesPerSec = Double.parseDouble(line.getOptionValue("fps", "" + initial_filesPerSec));
    } catch (NumberFormatException nfe) {
        System.err.println("\nError parsing \"fps\" (it should be a floating point value):\n" + nfe);
        return;
    }
    // Total number of files to write out; -1 indicates unlimited
    try {
        initial_totNumFiles = Integer.parseInt(line.getOptionValue("totnum", "" + initial_totNumFiles));
    } catch (NumberFormatException nfe) {
        System.err.println("\nError parsing \"totnum\" (it should be an integer):\n" + nfe);
        return;
    }
    // Specifies how files will be written out
    initial_mode_str = line.getOptionValue("mode", initial_mode_str);
    if (!initial_mode_str.equals("local") && !initial_mode_str.equals("ftp")
            && !initial_mode_str.equals("sftp")) {
        System.err.println(new String("\nUnrecognized mode, \"" + initial_mode_str + "\""));
        return;
    }
    // FTP hostname
    initial_ftpHost = line.getOptionValue("ftphost", "");
    // FTP username
    initial_ftpUser = line.getOptionValue("ftpuser", "");
    // FTP password
    initial_ftpPassword = line.getOptionValue("ftppass", "");

    // Create the FilePump object
    new FilePump(local_bShowGUI, initial_outputFolder, initial_filesPerSec, initial_totNumFiles,
            initial_mode_str, initial_ftpHost, initial_ftpUser, initial_ftpPassword);

}

From source file:com.bah.applefox.main.Ingest.java

public static void main(String[] args) throws Exception {

    if (args.length == 1 && args[0].equals("--help")) {
        System.out.println("Not enough arguments");
        System.out.println("Arguments should be in the format <properties file> <command>");
        System.out.println("Valid commands:");
        System.out.println("\tpr: Calculates Page Rank");
        System.out.println("\timageload: Loads Images from URLs");
        System.out.println("\tload: Loads Full Text Data");
        System.out.println("\tingest: Ingests URLs from given seed");
        System.out.println("\tftsample: Creates a Full Text Index Sample HashMap");
        System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap");
    }/*from w w w  .jav a  2  s  .  c o  m*/
    if (args.length > 2) {
        System.out.println("2 Arguments expected, " + args.length + " given.");
    }

    if (args.length < 2) {
        System.out.println("Not enough arguments");
        System.out.println("Arguments should be in the format <properties file> <command>");
        System.out.println("Valid commands:");
        System.out.println("\tpr: Calculates Page Rank");
        System.out.println("\timageload: Loads Images from URLs");
        System.out.println("\tload: Loads Full Text Data");
        System.out.println("\tingest: Ingests URLs from given seed");
        System.out.println("\tftsample: Creates a Full Text Index Sample HashMap");
        System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap");
    }
    injector = Guice.createInjector(new IngesterModule());

    // The properties object to read from the configuration file
    Properties properties = new Properties();

    try {
        // Load configuration file from the command line
        properties.load(new FileInputStream(args[0]));
    } catch (Exception e) {
        log.error("ABORT: File not found or could not read from file ->" + e.getMessage());
        log.error("Enter the location of the configuration file");
        System.exit(1);
    }

    // Initialize variables from configuration file

    // Accumulo Variables
    INSTANCE_NAME = properties.getProperty("INSTANCE_NAME");
    ZK_SERVERS = properties.getProperty("ZK_SERVERS");
    USERNAME = properties.getProperty("USERNAME");
    PASSWORD = properties.getProperty("PASSWORD");
    SPLIT_SIZE = properties.getProperty("SPLIT_SIZE");
    NUM_ITERATIONS = Integer.parseInt(properties.getProperty("NUM_ITERATIONS"));
    NUM_NODES = Integer.parseInt(properties.getProperty("NUM_NODES"));

    // General Search Variables
    MAX_NGRAMS = Integer.parseInt(properties.getProperty("MAX_NGRAMS"));
    GENERAL_STOP = properties.getProperty("GENERAL_STOP");

    // Full Text Variables
    FT_DATA_TABLE = properties.getProperty("FT_DATA_TABLE");
    FT_SAMPLE = properties.getProperty("FT_SAMPLE");
    FT_CHECKED_TABLE = properties.getProperty("FT_CHECKED_TABLE");
    FT_DIVS_FILE = properties.getProperty("FT_DIVS_FILE");
    FT_SPLIT_SIZE = properties.getProperty("FT_SPLIT_SIZE");

    // Web Crawler Variables
    URL_TABLE = properties.getProperty("URL_TABLE");
    SEED = properties.getProperty("SEED");
    USER_AGENT = properties.getProperty("USER_AGENT");
    URL_SPLIT_SIZE = properties.getProperty("URL_SPLIT_SIZE");

    // Page Rank Variables
    PR_TABLE_PREFIX = properties.getProperty("PR_TABLE_PREFIX");
    PR_URL_MAP_TABLE_PREFIX = properties.getProperty("PR_URL_MAP_TABLE_PREFIX");
    PR_OUT_LINKS_COUNT_TABLE = properties.getProperty("PR_OUT_LINKS_COUNT_TABLE");
    PR_FILE = properties.getProperty("PR_FILE");
    PR_DAMPENING_FACTOR = Double.parseDouble(properties.getProperty("PR_DAMPENING_FACTOR"));
    PR_ITERATIONS = Integer.parseInt(properties.getProperty("PR_ITERATIONS"));
    PR_SPLIT_SIZE = properties.getProperty("PR_SPLIT_SIZE");

    // Image Variables
    IMG_HASH_TABLE = properties.getProperty("IMG_HASH_TABLE");
    IMG_CHECKED_TABLE = properties.getProperty("IMG_CHECKED_TABLE");
    IMG_TAG_TABLE = properties.getProperty("IMG_TAG_TABLE");
    IMG_HASH_SAMPLE_TABLE = properties.getProperty("IMG_HASH_SAMPLE_TABLE");
    IMG_TAG_SAMPLE_TABLE = properties.getProperty("IMG_TAG_SAMPLE_TABLE");
    IMG_SPLIT_SIZE = properties.getProperty("IMG_SPLIT_SIZE");

    // Future Use:
    // Work Directory in HDFS
    WORK_DIR = properties.getProperty("WORK_DIR");

    // Initialize variable from command line
    RUN = args[1].toLowerCase();

    // Set the instance information for AccumuloUtils
    AccumuloUtils.setInstanceName(INSTANCE_NAME);
    AccumuloUtils.setInstancePassword(PASSWORD);
    AccumuloUtils.setUser(USERNAME);
    AccumuloUtils.setZooserver(ZK_SERVERS);
    AccumuloUtils.setSplitSize(SPLIT_SIZE);

    String[] temp = new String[25];

    // Accumulo Variables
    temp[0] = INSTANCE_NAME;
    temp[1] = ZK_SERVERS;
    temp[2] = USERNAME;
    temp[3] = PASSWORD;

    // Number of Map Tasks
    temp[4] = Integer.toString((int) Math.ceil(1.75 * NUM_NODES * 2));

    // Web Crawler Variables
    temp[5] = URL_TABLE;
    temp[6] = USER_AGENT;

    // Future Use
    temp[7] = WORK_DIR;

    // General Search
    temp[8] = GENERAL_STOP;
    temp[9] = Integer.toString(MAX_NGRAMS);

    // Full Text Variables
    temp[10] = FT_DATA_TABLE;
    temp[11] = FT_CHECKED_TABLE;

    // Page Rank Variables
    temp[12] = PR_URL_MAP_TABLE_PREFIX;
    temp[13] = PR_TABLE_PREFIX;
    temp[14] = Double.toString(PR_DAMPENING_FACTOR);
    temp[15] = PR_OUT_LINKS_COUNT_TABLE;
    temp[16] = PR_FILE;

    // Image Variables
    temp[17] = IMG_HASH_TABLE;
    temp[18] = IMG_CHECKED_TABLE;
    temp[19] = IMG_TAG_TABLE;

    temp[20] = FT_DIVS_FILE;

    // Table Split Sizes
    temp[21] = FT_SPLIT_SIZE;
    temp[22] = IMG_SPLIT_SIZE;
    temp[23] = URL_SPLIT_SIZE;
    temp[24] = PR_SPLIT_SIZE;

    if (RUN.equals("pr")) {
        // Run PR_ITERATIONS number of iterations for page ranking
        PageRank.createPageRank(temp, PR_ITERATIONS, URL_SPLIT_SIZE);
    } else if (RUN.equals("imageload")) {
        // Load image index
        AccumuloUtils.setSplitSize(URL_SPLIT_SIZE);
        ToolRunner.run(new ImageLoader(), temp);
    } else if (RUN.equals("ingest")) {
        // Ingest
        System.out.println("Ingesting");
        // Set table split size
        AccumuloUtils.setSplitSize(URL_SPLIT_SIZE);
        // Write the seed value to the table
        BatchWriter w;
        Value v = new Value();
        v.set("0".getBytes());
        Mutation m = new Mutation(SEED);
        m.put("0", "0", v);
        w = AccumuloUtils.connectBatchWrite(URL_TABLE);
        w.addMutation(m);

        for (int i = 0; i < NUM_ITERATIONS; i++) {
            // Run the ToolRunner for NUM_ITERATIONS iterations
            ToolRunner.run(CachedConfiguration.getInstance(), injector.getInstance(Ingester.class), temp);
        }
    } else if (RUN.equals("load")) {
        // Parse the URLs and add to the data table
        AccumuloUtils.setSplitSize(URL_SPLIT_SIZE);
        BatchWriter w = AccumuloUtils.connectBatchWrite(FT_CHECKED_TABLE);
        w.close();

        AccumuloUtils.setSplitSize(FT_SPLIT_SIZE);
        w = AccumuloUtils.connectBatchWrite(FT_DATA_TABLE);
        w.close();
        ToolRunner.run(CachedConfiguration.getInstance(), injector.getInstance(Loader.class), temp);
    } else if (RUN.equals("ftsample")) {
        // Create a sample table for full text index
        FTAccumuloSampler ftSampler = new FTAccumuloSampler(FT_SAMPLE, FT_DATA_TABLE, FT_CHECKED_TABLE);
        ftSampler.createSample();

    } else if (RUN.equals("imagesample")) {
        // Create a sample table for images
        ImageAccumuloSampler imgHashSampler = new ImageAccumuloSampler(IMG_HASH_SAMPLE_TABLE, IMG_HASH_TABLE,
                IMG_CHECKED_TABLE);
        imgHashSampler.createSample();

        ImageAccumuloSampler imgTagSampler = new ImageAccumuloSampler(IMG_TAG_SAMPLE_TABLE, IMG_TAG_TABLE,
                IMG_CHECKED_TABLE);
        imgTagSampler.createSample();
    } else {
        System.out.println("Invalid argument " + RUN + ".");
        System.out.println("Valid Arguments:");
        System.out.println("\tpr: Calculates Page Rank");
        System.out.println("\timageload: Loads Images from URLs");
        System.out.println("\tload: Loads Full Text Data");
        System.out.println("\tingest: Ingests URLs from given seed");
        System.out.println("\tftsample: Creates a Full Text Index Sample HashMap");
        System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap");
    }

}