Example usage for java.util ArrayList get

List of usage examples for java.util ArrayList get

Introduction

In this page you can find the example usage for java.util ArrayList get.

Prototype

public E get(int index) 

Source Link

Document

Returns the element at the specified position in this list.

Usage

From source file:TestBufferStreamGenomicsDBImporter.java

/**
 * Sample driver code for testing Java VariantContext write API for GenomicsDB
 * The code shows two ways of using the API
 *   (a) Iterator<VariantContext>//  w  w  w  .  jav a  2 s. co m
 *   (b) Directly adding VariantContext objects
 * If "-iterators" is passed as the second argument, method (a) is used.
 */
public static void main(final String[] args) throws IOException, GenomicsDBException, ParseException {
    if (args.length < 2) {
        System.err.println("For loading: [-iterators] <loader.json> "
                + "<stream_name_to_file.json> [bufferCapacity rank lbRowIdx ubRowIdx useMultiChromosomeIterator]");
        System.exit(-1);
    }
    int argsLoaderFileIdx = 0;
    if (args[0].equals("-iterators"))
        argsLoaderFileIdx = 1;
    //Buffer capacity
    long bufferCapacity = (args.length >= argsLoaderFileIdx + 3) ? Integer.parseInt(args[argsLoaderFileIdx + 2])
            : 1024;
    //Specify rank (or partition idx) of this process
    int rank = (args.length >= argsLoaderFileIdx + 4) ? Integer.parseInt(args[argsLoaderFileIdx + 3]) : 0;
    //Specify smallest row idx from which to start loading.
    // This is useful for incremental loading into existing array
    long lbRowIdx = (args.length >= argsLoaderFileIdx + 5) ? Long.parseLong(args[argsLoaderFileIdx + 4]) : 0;
    //Specify largest row idx up to which loading should be performed - for completeness
    long ubRowIdx = (args.length >= argsLoaderFileIdx + 6) ? Long.parseLong(args[argsLoaderFileIdx + 5])
            : Long.MAX_VALUE - 1;
    //Boolean to use MultipleChromosomeIterator
    boolean useMultiChromosomeIterator = (args.length >= argsLoaderFileIdx + 7)
            ? Boolean.parseBoolean(args[argsLoaderFileIdx + 6])
            : false;
    //<loader.json> first arg
    String loaderJSONFile = args[argsLoaderFileIdx];
    GenomicsDBImporter loader = new GenomicsDBImporter(loaderJSONFile, rank, lbRowIdx, ubRowIdx);
    //<stream_name_to_file.json> - useful for the driver only
    //JSON file that contains "stream_name": "vcf_file_path" entries
    FileReader mappingReader = new FileReader(args[argsLoaderFileIdx + 1]);
    JSONParser parser = new JSONParser();
    LinkedHashMap streamNameToFileName = (LinkedHashMap) parser.parse(mappingReader, new LinkedHashFactory());
    ArrayList<VCFFileStreamInfo> streamInfoVec = new ArrayList<VCFFileStreamInfo>();
    long rowIdx = 0;
    for (Object currObj : streamNameToFileName.entrySet()) {
        Map.Entry<String, String> entry = (Map.Entry<String, String>) currObj;
        VCFFileStreamInfo currInfo = new VCFFileStreamInfo(entry.getValue(), loaderJSONFile, rank,
                useMultiChromosomeIterator);

        /** The following 2 lines are not mandatory - use initializeSampleInfoMapFromHeader()
         * iff you know for sure that sample names in the VCF header are globally unique
         * across all streams/files. If not, you have 2 options:
         *   (a) specify your own mapping from sample index in the header to SampleInfo object
         *       (unique_name, rowIdx) OR
         *   (b) specify the mapping in the callset_mapping_file (JSON) and pass null to
         *       addSortedVariantContextIterator()
         */
        LinkedHashMap<Integer, GenomicsDBImporter.SampleInfo> sampleIndexToInfo = new LinkedHashMap<Integer, GenomicsDBImporter.SampleInfo>();
        rowIdx = GenomicsDBImporter.initializeSampleInfoMapFromHeader(sampleIndexToInfo, currInfo.mVCFHeader,
                rowIdx);
        int streamIdx = -1;
        if (args[0].equals("-iterators"))
            streamIdx = loader.addSortedVariantContextIterator(entry.getKey(), currInfo.mVCFHeader,
                    currInfo.mIterator, bufferCapacity, VariantContextWriterBuilder.OutputType.BCF_STREAM,
                    sampleIndexToInfo); //pass sorted VC iterators
        else
            //use buffers - VCs will be provided by caller
            streamIdx = loader.addBufferStream(entry.getKey(), currInfo.mVCFHeader, bufferCapacity,
                    VariantContextWriterBuilder.OutputType.BCF_STREAM, sampleIndexToInfo);
        currInfo.mStreamIdx = streamIdx;
        streamInfoVec.add(currInfo);
    }
    if (args[0].equals("-iterators")) {
        //Much simpler interface if using Iterator<VariantContext>
        loader.importBatch();
        assert loader.isDone();
    } else {
        //Must be called after all iterators/streams added - no more iterators/streams
        // can be added once this function is called
        loader.setupGenomicsDBImporter();
        //Counts and tracks buffer streams for which new data must be supplied
        //Initialized to all the buffer streams
        int numExhaustedBufferStreams = streamInfoVec.size();
        int[] exhaustedBufferStreamIdxs = new int[numExhaustedBufferStreams];
        for (int i = 0; i < numExhaustedBufferStreams; ++i)
            exhaustedBufferStreamIdxs[i] = i;
        while (!loader.isDone()) {
            //Add data for streams that were exhausted in the previous round
            for (int i = 0; i < numExhaustedBufferStreams; ++i) {
                VCFFileStreamInfo currInfo = streamInfoVec.get(exhaustedBufferStreamIdxs[i]);
                boolean added = true;
                while (added && (currInfo.mIterator.hasNext() || currInfo.mNextVC != null)) {
                    if (currInfo.mNextVC != null)
                        added = loader.add(currInfo.mNextVC, currInfo.mStreamIdx);
                    if (added)
                        if (currInfo.mIterator.hasNext())
                            currInfo.mNextVC = currInfo.mIterator.next();
                        else
                            currInfo.mNextVC = null;
                }
            }
            loader.importBatch();
            numExhaustedBufferStreams = (int) loader.getNumExhaustedBufferStreams();
            for (int i = 0; i < numExhaustedBufferStreams; ++i)
                exhaustedBufferStreamIdxs[i] = loader.getExhaustedBufferStreamIndex(i);
        }
    }
}

From source file:guardar.en.base.de.datos.MainServidor.java

public static void main(String[] args)
        throws ParserConfigurationException, SAXException, IOException, ClassNotFoundException {

    Mongo mongo = new Mongo("localhost", 27017);

    // nombre de la base de datos
    DB database = mongo.getDB("paginas");
    // coleccion de la db
    DBCollection collection = database.getCollection("indice");
    DBCollection collection_textos = database.getCollection("tabla");
    ArrayList<String> lista_textos = new ArrayList();

    try {/*from w w  w  .jav a  2 s  .c  o m*/
        ServerSocket servidor = new ServerSocket(4545); // Crear un servidor en pausa hasta que un cliente llegue.
        while (true) {
            String aux = new String();
            lista_textos.clear();
            Socket clienteNuevo = servidor.accept();// Si llega se acepta.
            // Queda en pausa otra vez hasta que un objeto llegue.
            ObjectInputStream entrada = new ObjectInputStream(clienteNuevo.getInputStream());

            JSONObject request = (JSONObject) entrada.readObject();
            String b = (String) request.get("id");
            //hacer una query a la base de datos con la palabra que se quiere obtener

            BasicDBObject query = new BasicDBObject("palabra", b);
            DBCursor cursor = collection.find(query);
            ArrayList<DocumentosDB> lista_doc = new ArrayList<>();
            // de la query tomo el campo documentos y los agrego a una lista
            try {
                while (cursor.hasNext()) {
                    //System.out.println(cursor.next());
                    BasicDBList campo_documentos = (BasicDBList) cursor.next().get("documentos");
                    // en el for voy tomando uno por uno los elementos en el campo documentos
                    for (Iterator<Object> it = campo_documentos.iterator(); it.hasNext();) {
                        BasicDBObject dbo = (BasicDBObject) it.next();
                        //DOC tiene id y frecuencia
                        DocumentosDB doc = new DocumentosDB();
                        doc.makefn2(dbo);
                        //int id = (int)doc.getId_documento();
                        //int f = (int)doc.getFrecuencia();

                        lista_doc.add(doc);

                        //*******************************************

                        //********************************************

                        //QUERY A LA COLECCION DE TEXTOS
                        /* BasicDBObject query_textos = new BasicDBObject("id", doc.getId_documento());//query
                         DBCursor cursor_textos = collection_textos.find(query_textos);
                         try {
                        while (cursor_textos.hasNext()) {
                                    
                                    
                            DBObject obj = cursor_textos.next();
                                
                            String titulo = (String) obj.get("titulo");
                            titulo = titulo + "\n\n";
                            String texto = (String) obj.get("texto");
                                
                            String texto_final = titulo + texto;
                            aux = texto_final;
                            lista_textos.add(texto_final);
                        }
                         } finally {
                        cursor_textos.close();
                         }*/
                        //System.out.println(doc.getId_documento());
                        //System.out.println(doc.getFrecuencia());

                    } // end for

                } //end while query

            } finally {
                cursor.close();
            }

            // ordeno la lista de menor a mayor
            Collections.sort(lista_doc, new Comparator<DocumentosDB>() {

                @Override
                public int compare(DocumentosDB o1, DocumentosDB o2) {
                    //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
                    return o1.getFrecuencia().compareTo(o2.getFrecuencia());
                }
            });
            int tam = lista_doc.size() - 1;
            for (int j = tam; j >= 0; j--) {

                BasicDBObject query_textos = new BasicDBObject("id",
                        (int) lista_doc.get(j).getId_documento().intValue());//query
                DBCursor cursor_textos = collection_textos.find(query_textos);// lo busco
                try {
                    while (cursor_textos.hasNext()) {

                        DBObject obj = cursor_textos.next();
                        String titulo = "*******************************";
                        titulo += (String) obj.get("titulo");
                        int f = (int) lista_doc.get(j).getFrecuencia().intValue();
                        String strinf = Integer.toString(f);
                        titulo += "******************************* frecuencia:" + strinf;
                        titulo = titulo + "\n\n";

                        String texto = (String) obj.get("texto");

                        String texto_final = titulo + texto + "\n\n";
                        aux = aux + texto_final;
                        //lista_textos.add(texto_final);
                    }
                } finally {
                    cursor_textos.close();
                }

            }

            //actualizar el cache
            try {
                Socket cliente_cache = new Socket("localhost", 4500); // nos conectamos con el servidor
                ObjectOutputStream mensaje_cache = new ObjectOutputStream(cliente_cache.getOutputStream()); // get al output del servidor, que es cliente : socket del cliente q se conecto al server
                JSONObject actualizacion_cache = new JSONObject();
                actualizacion_cache.put("actualizacion", 1);
                actualizacion_cache.put("busqueda", b);
                actualizacion_cache.put("respuesta", aux);
                mensaje_cache.writeObject(actualizacion_cache); // envio el msj al servidor
            } catch (Exception ex) {

            }

            //RESPONDER DESDE EL SERVIDORIndex al FRONT
            ObjectOutputStream resp = new ObjectOutputStream(clienteNuevo.getOutputStream());// obtengo el output del cliente para mandarle un msj
            resp.writeObject(aux);
            System.out.println("msj enviado desde el servidor");

        }
    } catch (IOException ex) {
        Logger.getLogger(MainServidor.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java

/**
 * @param args/*from  w ww  .  j  a va  2 s  . c  om*/
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0];
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2.");
        firstGroup.addAll(secondGroup);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1.");
        secondGroup.addAll(firstGroup);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3);

    String dataAttributesInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/"
                    + dataset1 + "-" + dataset2 + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        dataAttributesInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "correlation";
    String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/";

    FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(SpatioTemporalValueWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationTechniquesMapper.class);
    job.setReducerClass(CorrelationTechniquesReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job,
            dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir));

    job.setJarByClass(CorrelationTechniques.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-"
                    + dataset2 + "/";
            String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2
                    + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:net.massbank.validator.RecordValidator.java

public static void main(String[] args) {
    RequestDummy request;/*w w w . j a v a 2  s .  c o m*/

    PrintStream out = System.out;

    Options lvOptions = new Options();
    lvOptions.addOption("h", "help", false, "show this help.");
    lvOptions.addOption("r", "recdata", true,
            "points to the recdata directory containing massbank records. Reads all *.txt files in there.");

    CommandLineParser lvParser = new BasicParser();
    CommandLine lvCmd = null;
    try {
        lvCmd = lvParser.parse(lvOptions, args);
        if (lvCmd.hasOption('h')) {
            printHelp(lvOptions);
            return;
        }
    } catch (org.apache.commons.cli.ParseException pvException) {
        System.out.println(pvException.getMessage());
    }

    String recDataPath = lvCmd.getOptionValue("recdata");

    // ---------------------------------------------
    // ????
    // ---------------------------------------------

    final String baseUrl = MassBankEnv.get(MassBankEnv.KEY_BASE_URL);
    final String dbRootPath = "./";
    final String dbHostName = MassBankEnv.get(MassBankEnv.KEY_DB_HOST_NAME);
    final String tomcatTmpPath = ".";
    final String tmpPath = (new File(tomcatTmpPath + sdf.format(new Date()))).getPath() + File.separator;
    GetConfig conf = new GetConfig(baseUrl);
    int recVersion = 2;
    String selDbName = "";
    Object up = null; // Was: file Upload
    boolean isResult = true;
    String upFileName = "";
    boolean upResult = false;
    DatabaseAccess db = null;

    try {
        // ----------------------------------------------------
        // ???
        // ----------------------------------------------------
        // if (FileUpload.isMultipartContent(request)) {
        // (new File(tmpPath)).mkdir();
        // String os = System.getProperty("os.name");
        // if (os.indexOf("Windows") == -1) {
        // isResult = FileUtil.changeMode("777", tmpPath);
        // if (!isResult) {
        // out.println(msgErr("[" + tmpPath
        // + "]  chmod failed."));
        // return;
        // }
        // }
        // up = new FileUpload(request, tmpPath);
        // }

        // ----------------------------------------------------
        // ?DB????
        // ----------------------------------------------------
        List<String> dbNameList = Arrays.asList(conf.getDbName());
        ArrayList<String> dbNames = new ArrayList<String>();
        dbNames.add("");
        File[] dbDirs = (new File(dbRootPath)).listFiles();
        if (dbDirs != null) {
            for (File dbDir : dbDirs) {
                if (dbDir.isDirectory()) {
                    int pos = dbDir.getName().lastIndexOf("\\");
                    String dbDirName = dbDir.getName().substring(pos + 1);
                    pos = dbDirName.lastIndexOf("/");
                    dbDirName = dbDirName.substring(pos + 1);
                    if (dbNameList.contains(dbDirName)) {
                        // DB???massbank.conf???DB????
                        dbNames.add(dbDirName);
                    }
                }
            }
        }
        if (dbDirs == null || dbNames.size() == 0) {
            out.println(msgErr("[" + dbRootPath + "] directory not exist."));
            return;
        }
        Collections.sort(dbNames);

        // ----------------------------------------------------
        // ?
        // ----------------------------------------------------
        // if (FileUpload.isMultipartContent(request)) {
        // HashMap<String, String[]> reqParamMap = new HashMap<String,
        // String[]>();
        // reqParamMap = up.getRequestParam();
        // if (reqParamMap != null) {
        // for (Map.Entry<String, String[]> req : reqParamMap
        // .entrySet()) {
        // if (req.getKey().equals("ver")) {
        // try {
        // recVersion = Integer
        // .parseInt(req.getValue()[0]);
        // } catch (NumberFormatException nfe) {
        // }
        // } else if (req.getKey().equals("db")) {
        // selDbName = req.getValue()[0];
        // }
        // }
        // }
        // } else {
        // if (request.getParameter("ver") != null) {
        // try {
        // recVersion = Integer.parseInt(request
        // .getParameter("ver"));
        // } catch (NumberFormatException nfe) {
        // }
        // }
        // selDbName = request.getParameter("db");
        // }
        // if (selDbName == null || selDbName.equals("")
        // || !dbNames.contains(selDbName)) {
        // selDbName = dbNames.get(0);
        // }

        // ---------------------------------------------
        // 
        // ---------------------------------------------
        out.println("Database: ");
        for (int i = 0; i < dbNames.size(); i++) {
            String dbName = dbNames.get(i);
            out.print("dbName");
            if (dbName.equals(selDbName)) {
                out.print(" selected");
            }
            if (i == 0) {
                out.println("------------------");
            } else {
                out.println(dbName);
            }
        }
        out.println("Record Version : ");
        out.println(recVersion);

        out.println("Record Archive :");

        // ---------------------------------------------
        // 
        // ---------------------------------------------
        //         HashMap<String, Boolean> upFileMap = up.doUpload();
        //         if (upFileMap != null) {
        //            for (Map.Entry<String, Boolean> e : upFileMap.entrySet()) {
        //               upFileName = e.getKey();
        //               upResult = e.getValue();
        //               break;
        //            }
        //            if (upFileName.equals("")) {
        //               out.println(msgErr("please select file."));
        //               isResult = false;
        //            } else if (!upResult) {
        //               out.println(msgErr("[" + upFileName
        //                     + "] upload failed."));
        //               isResult = false;
        //            } else if (!upFileName.endsWith(ZIP_EXTENSION)
        //                  && !upFileName.endsWith(MSBK_EXTENSION)) {
        //               out.println(msgErr("please select ["
        //                     + UPLOAD_RECDATA_ZIP
        //                     + "] or ["
        //                     + UPLOAD_RECDATA_MSBK + "]."));
        //               up.deleteFile(upFileName);
        //               isResult = false;
        //            }
        //         } else {
        //            out.println(msgErr("server error."));
        //            isResult = false;
        //         }
        //         up.deleteFileItem();
        //         if (!isResult) {
        //            return;
        //         }

        // ---------------------------------------------
        // ???
        // ---------------------------------------------
        //         final String upFilePath = (new File(tmpPath + File.separator
        //               + upFileName)).getPath();
        //         isResult = FileUtil.unZip(upFilePath, tmpPath);
        //         if (!isResult) {
        //            out.println(msgErr("["
        //                  + upFileName
        //                  + "]  extraction failed. possibility of time-out."));
        //            return;
        //         }

        // ---------------------------------------------
        // ??
        // ---------------------------------------------
        final String recPath = (new File(dbRootPath + File.separator + selDbName)).getPath();
        File tmpRecDir = new File(recDataPath);
        if (!tmpRecDir.isDirectory()) {
            tmpRecDir.mkdirs();
        }

        // ---------------------------------------------
        // ???
        // ---------------------------------------------
        // data?
        //         final String recDataPath = (new File(tmpPath + File.separator
        //               + RECDATA_DIR_NAME)).getPath()
        //               + File.separator;
        //
        //         if (!(new File(recDataPath)).isDirectory()) {
        //            if (upFileName.endsWith(ZIP_EXTENSION)) {
        //               out.println(msgErr("["
        //                     + RECDATA_DIR_NAME
        //                     + "]  directory is not included in the up-loading file."));
        //            } else if (upFileName.endsWith(MSBK_EXTENSION)) {
        //               out.println(msgErr("The uploaded file is not record data."));
        //            }
        //            return;
        //         }

        // ---------------------------------------------
        // DB
        // ---------------------------------------------
        //         db = new DatabaseAccess(dbHostName, selDbName);
        //         isResult = db.open();
        //         if (!isResult) {
        //            db.close();
        //            out.println(msgErr("not connect to database."));
        //            return;
        //         }

        // ---------------------------------------------
        // ??
        // ---------------------------------------------
        TreeMap<String, String> resultMap = validationRecord(db, out, recDataPath, recPath, recVersion);
        if (resultMap.size() == 0) {
            return;
        }

        // ---------------------------------------------
        // ?
        // ---------------------------------------------
        isResult = dispResult(out, resultMap);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        if (db != null) {
            db.close();
        }
        File tmpDir = new File(tmpPath);
        if (tmpDir.exists()) {
            FileUtil.removeDir(tmpDir.getPath());
        }
    }

}

From source file:DIA_Umpire_Quant.DIA_Umpire_IntLibSearch.java

/**
 * @param args the command line arguments
 *///w w w  .j a  v a2  s .c  o  m
public static void main(String[] args) throws FileNotFoundException, IOException, Exception {
    System.out.println(
            "=================================================================================================");
    System.out.println("DIA-Umpire targeted re-extraction analysis using internal library (version: "
            + UmpireInfo.GetInstance().Version + ")");
    if (args.length != 1) {
        System.out.println(
                "command format error, the correct format should be : java -jar -Xmx10G DIA_Umpire_IntLibSearch.jar diaumpire_module.params");
        return;
    }
    try {
        ConsoleLogger.SetConsoleLogger(Level.INFO);
        ConsoleLogger.SetFileLogger(Level.DEBUG,
                FilenameUtils.getFullPath(args[0]) + "diaumpire_intlibsearch.log");
    } catch (Exception e) {
    }

    Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version);
    Logger.getRootLogger().info("Parameter file:" + args[0]);

    BufferedReader reader = new BufferedReader(new FileReader(args[0]));
    String line = "";
    String WorkFolder = "";
    int NoCPUs = 2;

    String InternalLibID = "";

    float ProbThreshold = 0.99f;
    float RTWindow_Int = -1f;
    float Freq = 0f;
    int TopNFrag = 6;

    TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600);
    HashMap<String, File> AssignFiles = new HashMap<>();

    //<editor-fold defaultstate="collapsed" desc="Reading parameter file">
    while ((line = reader.readLine()) != null) {
        line = line.trim();
        Logger.getRootLogger().info(line);
        if (!"".equals(line) && !line.startsWith("#")) {
            //System.out.println(line);
            if (line.equals("==File list begin")) {
                do {
                    line = reader.readLine();
                    line = line.trim();
                    if (line.equals("==File list end")) {
                        continue;
                    } else if (!"".equals(line)) {
                        File newfile = new File(line);
                        if (newfile.exists()) {
                            AssignFiles.put(newfile.getAbsolutePath(), newfile);
                        } else {
                            Logger.getRootLogger().info("File: " + newfile + " does not exist.");
                        }
                    }
                } while (!line.equals("==File list end"));
            }
            if (line.split("=").length < 2) {
                continue;
            }
            String type = line.split("=")[0].trim();
            String value = line.split("=")[1].trim();
            switch (type) {
            case "Path": {
                WorkFolder = value;
                break;
            }
            case "path": {
                WorkFolder = value;
                break;
            }
            case "Thread": {
                NoCPUs = Integer.parseInt(value);
                break;
            }

            case "InternalLibID": {
                InternalLibID = value;
                break;
            }

            case "RTWindow_Int": {
                RTWindow_Int = Float.parseFloat(value);
                break;
            }

            case "ProbThreshold": {
                ProbThreshold = Float.parseFloat(value);
                break;
            }
            case "TopNFrag": {
                TopNFrag = Integer.parseInt(value);
                break;
            }
            case "Freq": {
                Freq = Float.parseFloat(value);
                break;
            }
            case "Fasta": {
                tandemPara.FastaPath = value;
                break;
            }
            }
        }
    }
    //</editor-fold>

    //Initialize PTM manager using compomics library
    PTMManager.GetInstance();

    //Check if the fasta file can be found
    if (!new File(tandemPara.FastaPath).exists()) {
        Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath
                + " cannot be found, the process will be terminated, please check.");
        System.exit(1);
    }

    //Generate DIA file list
    ArrayList<DIAPack> FileList = new ArrayList<>();
    try {
        File folder = new File(WorkFolder);
        if (!folder.exists()) {
            Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found.");
            System.exit(1);
        }
        for (final File fileEntry : folder.listFiles()) {
            if (fileEntry.isFile()
                    && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml")
                            | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml"))
                    && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml")
                    && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml")
                    && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) {
                AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry);
            }
            if (fileEntry.isDirectory()) {
                for (final File fileEntry2 : fileEntry.listFiles()) {
                    if (fileEntry2.isFile()
                            && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml")
                                    | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml"))
                            && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml")
                            && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml")
                            && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) {
                        AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2);
                    }
                }
            }
        }

        Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size());
        for (File fileEntry : AssignFiles.values()) {
            Logger.getRootLogger().info(fileEntry.getAbsolutePath());
        }
        for (File fileEntry : AssignFiles.values()) {
            String mzXMLFile = fileEntry.getAbsolutePath();
            if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) {
                DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs);
                Logger.getRootLogger().info(
                        "=================================================================================================");
                Logger.getRootLogger().info("Processing " + mzXMLFile);
                if (!DiaFile.LoadDIASetting()) {
                    Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete");
                    System.exit(1);
                }
                if (!DiaFile.LoadParams()) {
                    Logger.getRootLogger().info("Loading parameters failed, job is incomplete");
                    System.exit(1);
                }
                Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "....");

                //If the serialization file for ID file existed
                if (DiaFile.ReadSerializedLCMSID()) {
                    DiaFile.IDsummary.ReduceMemoryUsage();
                    DiaFile.IDsummary.FastaPath = tandemPara.FastaPath;
                    FileList.add(DiaFile);
                }
            }
        }

        //<editor-fold defaultstate="collapsed" desc="Targete re-extraction using internal library">            
        Logger.getRootLogger().info(
                "=================================================================================================");
        if (FileList.size() > 1) {
            Logger.getRootLogger().info("Targeted re-extraction using internal library");

            FragmentLibManager libManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder,
                    InternalLibID);
            if (libManager == null) {
                Logger.getRootLogger().info("Building internal spectral library");
                libManager = new FragmentLibManager(InternalLibID);
                ArrayList<LCMSID> LCMSIDList = new ArrayList<>();
                for (DIAPack dia : FileList) {
                    LCMSIDList.add(dia.IDsummary);
                }
                libManager.ImportFragLibTopFrag(LCMSIDList, Freq, TopNFrag);
                libManager.WriteFragmentLibSerialization(WorkFolder);
            }
            libManager.ReduceMemoryUsage();

            Logger.getRootLogger()
                    .info("Building retention time prediction model and generate candidate peptide list");
            for (int i = 0; i < FileList.size(); i++) {
                FileList.get(i).IDsummary.ClearMappedPep();
            }
            for (int i = 0; i < FileList.size(); i++) {
                for (int j = i + 1; j < FileList.size(); j++) {
                    RTAlignedPepIonMapping alignment = new RTAlignedPepIonMapping(WorkFolder,
                            FileList.get(i).GetParameter(), FileList.get(i).IDsummary,
                            FileList.get(j).IDsummary);
                    alignment.GenerateModel();
                    alignment.GenerateMappedPepIon();
                }
                FileList.get(i).ExportID();
                FileList.get(i).IDsummary = null;
            }

            Logger.getRootLogger().info("Targeted matching........");
            for (DIAPack diafile : FileList) {
                if (diafile.IDsummary == null) {
                    diafile.ReadSerializedLCMSID();
                }
                if (!diafile.IDsummary.GetMappedPepIonList().isEmpty()) {
                    diafile.UseMappedIon = true;
                    diafile.FilterMappedIonByProb = false;
                    diafile.BuildStructure();
                    diafile.MS1FeatureMap.ReadPeakCluster();
                    diafile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster();
                    diafile.GenerateMassCalibrationRTMap();
                    diafile.TargetedExtractionQuant(false, libManager, ProbThreshold, RTWindow_Int);
                    diafile.MS1FeatureMap.ClearAllPeaks();
                    diafile.IDsummary.ReduceMemoryUsage();
                    diafile.IDsummary.RemoveLowProbMappedIon(ProbThreshold);
                    diafile.ExportID();
                    Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size()
                            + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size());
                    diafile.ClearStructure();
                }
                diafile.IDsummary = null;
                System.gc();
            }
            Logger.getRootLogger().info(
                    "=================================================================================================");
        }
        //</editor-fold>

        Logger.getRootLogger().info("Job done");
        Logger.getRootLogger().info(
                "=================================================================================================");

    } catch (Exception e) {
        Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e));
        throw e;
    }
}

From source file:edu.nyu.vida.data_polygamy.relationship_computation.Relationship.java

/**
 * @param args//from w w  w.  j ava 2  s . c  o  m
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option scoreOption = new Option("sc", "score", true, "set threhsold for relationship score");
    scoreOption.setRequired(false);
    scoreOption.setArgName("SCORE THRESHOLD");
    options.addOption(scoreOption);

    Option strengthOption = new Option("st", "strength", true, "set threhsold for relationship strength");
    strengthOption.setRequired(false);
    strengthOption.setArgName("STRENGTH THRESHOLD");
    options.addOption(strengthOption);

    Option completeRandomizationOption = new Option("c", "complete-randomization", false,
            "use complete randomization when performing significance tests");
    completeRandomizationOption.setRequired(false);
    options.addOption(completeRandomizationOption);

    Option idOption = new Option("id", "ids", false, "output id instead of names for datasets and attributes");
    idOption.setRequired(false);
    options.addOption(idOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    Option removeOption = new Option("r", "remove-not-significant", false,
            "remove relationships that are not" + "significant from the final output");
    removeOption.setRequired(false);
    options.addOption(removeOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeNotSignificant = cmd.hasOption("r");
    boolean removeExistingFiles = cmd.hasOption("f");
    boolean completeRandomization = cmd.hasOption("c");
    boolean hasScoreThreshold = cmd.hasOption("sc");
    boolean hasStrengthThreshold = cmd.hasOption("st");
    boolean outputIds = cmd.hasOption("id");
    String scoreThreshold = hasScoreThreshold ? cmd.getOptionValue("sc") : "";
    String strengthThreshold = hasStrengthThreshold ? cmd.getOptionValue("st") : "";

    // all datasets
    ArrayList<String> all_datasets = new ArrayList<String>();
    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        all_datasets.add(line.split("\t")[0]);
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();
    String[] all_datasets_array = new String[all_datasets.size()];
    all_datasets.toArray(all_datasets_array);

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : all_datasets_array;
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("No indices from datasets in G1.");
        System.exit(0);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("No indices from datasets in G2.");
        System.exit(0);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    br = new BufferedReader(new InputStreamReader(fs.open(path)));
    line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        all_datasets.add(dt[0]);
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    String relationshipsDir = "";
    if (outputIds) {
        relationshipsDir = FrameworkUtils.relationshipsIdsDir;
    } else {
        relationshipsDir = FrameworkUtils.relationshipsDir;
    }

    FrameworkUtils.createDir(s3bucket + relationshipsDir, s3conf, s3);

    String random = completeRandomization ? "complete" : "restricted";

    String indexInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2
                    + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        indexInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "relationship" + "-" + random;
    String relationshipOutputDir = s3bucket + relationshipsDir + "/tmp/";

    FrameworkUtils.removeFile(relationshipOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("complete-random", String.valueOf(completeRandomization));
    conf.set("output-ids", String.valueOf(outputIds));
    conf.set("complete-random-str", random);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    conf.set("remove-not-significant", String.valueOf(removeNotSignificant));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }
    if (hasScoreThreshold) {
        conf.set("score-threshold", scoreThreshold);
    }
    if (hasStrengthThreshold) {
        conf.set("strength-threshold", strengthThreshold);
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(TopologyTimeSeriesWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationMapper.class);
    job.setReducerClass(CorrelationReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job, indexInputDirs.substring(0, indexInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(relationshipOutputDir));

    job.setJarByClass(Relationship.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + relationshipsDir + "/tmp/" + dataset1 + "-" + dataset2 + "/";
            String to = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2 + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:net.java.sen.tools.MkSenDic.java

/**
 * Build sen dictionary./*from   w  w  w  .j  a v  a2s  . c o  m*/
 * 
 * @param args
 *            custom dictionary files. see dic/build.xml.
 */
public static void main(String args[]) {
    ResourceBundle rb = ResourceBundle.getBundle("dictionary");
    DictionaryMaker dm1 = new DictionaryMaker();
    DictionaryMaker dm2 = new DictionaryMaker();
    DictionaryMaker dm3 = new DictionaryMaker();

    // 1st field information of connect file.
    Vector rule1 = new Vector();

    // 2nd field information of connect file.
    Vector rule2 = new Vector();

    // 3rd field information of connect file.
    Vector rule3 = new Vector();

    // 4th field information of connect file.
    // this field shows cost of morpheme connection
    // [size3*(x3*size2+x2)+x1]
    // [size3*(Attr1*size2+Attr2)+Attl]
    short score[] = new short[20131];

    long start = System.currentTimeMillis();

    // /////////////////////////////////////////
    //
    // Step1. Loading connetion file.
    //
    log.info("(1/7): reading connection matrix ... ");
    try {
        log.info("connection file = " + rb.getString("text_connection_file"));
        log.info("charset = " + rb.getString("dic.charset"));
        CSVParser csvparser = new CSVParser(new FileInputStream(rb.getString("text_connection_file")),
                rb.getString("dic.charset"));
        String t[];
        int line = 0;
        while ((t = csvparser.nextTokens()) != null) {
            if (t.length < 4) {
                log.warn("invalid line in " + rb.getString("text_connection_file") + ":" + line);
                log.warn(rb.getString("text_connection_file") + "may be broken.");
                break;
            }
            dm1.add(t[0]);
            rule1.add(t[0]);

            dm2.add(t[1]);
            rule2.add(t[1]);

            dm3.add(t[2]);
            rule3.add(t[2]);

            if (line == score.length) {
                score = resize(score);
            }

            score[line++] = (short) Integer.parseInt(t[3]);
        }

        // /////////////////////////////////////////
        //
        // Step2. Building internal dictionary
        //
        log.info("(2/7): building type dictionary ... ");
        dm1.build();
        dm2.build();
        dm3.build();

        // if you want check specified morpheme, you uncomment and modify
        // following line:
        /*
         * System.out.print("22="); dm3.getById(22);
         * System.out.print("368="); dm3.getById(368);
         * 
         * System.out.println(dm3.getDicId("?????*,*,*,*,?"));
         * DictionaryMaker.debug = true;
         * System.out.println(dm3.getDicId("?????*,*,*,*,?"));
         * System.out.println(dm3.getDicIdNoCache("?????*,*,*,*,?"));
         */

    } catch (IOException e) {
        e.printStackTrace();
        System.exit(0);
    }

    // -------------------------------------------------

    int size1 = dm1.size();
    int size2 = dm2.size();
    int size3 = dm3.size();
    int ruleSize = rule1.size();
    short matrix[] = new short[size1 * size2 * size3];
    short default_cost = (short) Integer.parseInt(rb.getString("default_connection_cost"));

    // /////////////////////////////////////////
    //
    // Step3. Writing Connection Matrix
    //
    log.info("(3/7): writing conection matrix (" + size1 + " x " + size2 + " x " + size3 + " = "
            + size1 * size2 * size3 + ") ...");

    for (int i = 0; i < (int) (size1 * size2 * size3); i++)
        matrix[i] = default_cost;

    for (int i = 0; i < ruleSize; i++) {
        Vector r1 = dm1.getRuleIdList((String) rule1.get(i));
        Vector r2 = dm2.getRuleIdList((String) rule2.get(i));
        Vector r3 = dm3.getRuleIdList((String) rule3.get(i));

        for (Iterator i1 = r1.iterator(); i1.hasNext();) {
            int ii1 = ((Integer) i1.next()).intValue();
            for (Iterator i2 = r2.iterator(); i2.hasNext();) {
                int ii2 = ((Integer) i2.next()).intValue();
                for (Iterator i3 = r3.iterator(); i3.hasNext();) {
                    int ii3 = ((Integer) i3.next()).intValue();
                    int pos = size3 * (size2 * ii1 + ii2) + ii3;
                    matrix[pos] = score[i];
                }
            }
        }
    }

    try {
        DataOutputStream out = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(rb.getString("matrix_file"))));
        out.writeShort(size1);
        out.writeShort(size2);
        out.writeShort(size3);
        for (int i1 = 0; i1 < size1; i1++)
            for (int i2 = 0; i2 < size2; i2++)
                for (int i3 = 0; i3 < size3; i3++) {
                    out.writeShort(matrix[size3 * (size2 * i1 + i2) + i3]);
                    // if (matrix[size3 * (size2 * i1 + i2) + i3] !=
                    // default_cost) {
                    // }
                }
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(0);
    }

    matrix = null;
    score = null;

    // -------------------------------------------------

    int pos_start = Integer.parseInt(rb.getString("pos_start"));
    int pos_size = Integer.parseInt(rb.getString("pos_size"));

    int di = 0;
    int offset = 0;
    ArrayList dicList = new ArrayList();

    // /////////////////////////////////////////
    //
    // Step4. Reading Morpheme Information
    //
    log.info("(4/7): reading morpheme information ... ");
    String t = null;
    String[] csv = null;
    try {
        // writer for feature file.
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(rb.getString("pos_file")), rb.getString("sen.charset")));

        log.info("load dic: " + rb.getString("text_dic_file"));
        BufferedReader dicStream = null;
        int custom_dic = -1;
        if (args.length == 0) {
            dicStream = new BufferedReader(new InputStreamReader(
                    new FileInputStream(rb.getString("text_dic_file")), rb.getString("dic.charset")));
        } else {
            custom_dic = 0;
            dicStream = new BufferedReader(
                    new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset")));
        }

        int line = 0;

        CSVData key_b = new CSVData();
        CSVData pos_b = new CSVData();

        while (true) {
            t = dicStream.readLine();
            if (t == null) {
                dicStream.close();
                custom_dic++;
                if (args.length == custom_dic) {
                    break;
                } else {
                    // read custum dictionary
                    log.info("load dic: " + "args[custum_dic]");
                    dicStream = new BufferedReader(new InputStreamReader(new FileInputStream(args[custom_dic]),
                            rb.getString("dic.charset")));
                }
                continue;
            }

            CSVParser parser = new CSVParser(t);
            csv = parser.nextTokens();
            if (csv.length < (pos_size + pos_start)) {
                throw new RuntimeException("format error:" + t);
            }

            key_b.clear();
            pos_b.clear();
            for (int i = pos_start; i < (pos_start + pos_size - 1); i++) {
                key_b.append(csv[i]);
                pos_b.append(csv[i]);
            }

            key_b.append(csv[pos_start + pos_size - 1]);
            pos_b.append(csv[pos_start + pos_size - 1]);

            for (int i = pos_start + pos_size; i < (csv.length - 1); i++) {
                pos_b.append(csv[i]);
            }
            pos_b.append(csv[csv.length - 1]);

            CToken token = new CToken();

            token.rcAttr2 = (short) dm1.getDicId(key_b.toString());
            token.rcAttr1 = (short) dm2.getDicId(key_b.toString());
            token.lcAttr = (short) dm3.getDicId(key_b.toString());
            token.posid = 0;
            token.posID = offset;
            token.length = (short) csv[0].length();
            token.cost = (short) Integer.parseInt(csv[1]);

            dicList.add(new PairObject(csv[0], token));

            byte b[] = pos_b.toString().getBytes(rb.getString("sen.charset"));
            offset += (b.length + 1);
            String pos_b_str = pos_b.toString();
            bw.write(pos_b_str, 0, pos_b_str.length());
            // bw.write(b, 0, b.length);
            bw.write(0);
            if (++di % 50000 == 0)
                log.info("" + di + "... ");
        }
        bw.close();
        // ----end of writing feature.cha ----
    } catch (Exception e) {
        log.error("Error: " + t);
        e.printStackTrace();
        System.exit(1);
    }

    rule1 = null;
    rule2 = null;
    rule3 = null;

    // /////////////////////////////////////////
    //
    // Step5. Sort lexs and write to file
    //
    log.info("(5/7): sorting lex... ");

    int value[] = new int[dicList.size()];
    char key[][] = new char[dicList.size()][];
    int spos = 0;
    int dsize = 0;
    int bsize = 0;
    String prev = "";
    Collections.sort(dicList);

    // /////////////////////////////////////////
    //
    // Step6. Writing Token Information
    //
    log.info("(6/7): writing token... ");
    try {
        // writer for token file.
        DataOutputStream out = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(rb.getString("token_file"))));

        // writing 'bos' and 'eos' and 'unknown' token.
        CToken token = new CToken();
        token.rcAttr2 = (short) dm1.getDicId(rb.getString("bos_pos"));
        token.rcAttr1 = (short) dm2.getDicId(rb.getString("bos_pos"));
        token.lcAttr = (short) dm3.getDicId(rb.getString("bos_pos"));
        token.write(out);

        token.rcAttr2 = (short) dm1.getDicId(rb.getString("eos_pos"));
        token.rcAttr1 = (short) dm2.getDicId(rb.getString("eos_pos"));
        token.lcAttr = (short) dm3.getDicId(rb.getString("eos_pos"));
        token.write(out);

        token.rcAttr2 = (short) dm1.getDicId(rb.getString("unknown_pos"));
        token.rcAttr1 = (short) dm2.getDicId(rb.getString("unknown_pos"));
        token.lcAttr = (short) dm3.getDicId(rb.getString("unknown_pos"));
        token.posID = -1;
        token.write(out);
        log.info("key size = " + key.length);
        for (int i = 0; i < key.length; i++) {
            String k = (String) ((PairObject) dicList.get(i)).key;
            if (!prev.equals(k) && i != 0) {
                key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray();
                value[dsize] = bsize + (spos << 8);
                dsize++;
                bsize = 1;
                spos = i;
            } else {
                bsize++;
            }
            prev = (String) ((PairObject) dicList.get(i)).key;
            ((CToken) (((PairObject) dicList.get(i)).value)).write(out);
        }
        out.flush();
        out.close();
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray();

    value[dsize] = bsize + (spos << 8);
    dsize++;

    dm1 = null;
    dm2 = null;
    dm3 = null;
    dicList = null;

    // /////////////////////////////////////////
    //
    // Step7. Build Double Array
    //
    log.info("(7/7): building Double-Array (size = " + dsize + ") ...");

    DoubleArrayTrie da = new DoubleArrayTrie();

    da.build(key, null, value, dsize);
    try {
        da.save(rb.getString("double_array_file"));
    } catch (Exception e) {
        e.printStackTrace();
    }

    log.info("total time = " + (System.currentTimeMillis() - start) / 1000 + "[ms]");
}

From source file:ServerStatus.java

License:asdf

/**
 * @param args the command line arguments
 *///from w w  w  .jav  a2  s. c  o m
public static void main(String[] args)
        throws InterruptedException, FileNotFoundException, IOException, ParseException {
    FileReader reader = null;
    ArrayList<BankInfo2> BankArray = new ArrayList<BankInfo2>();
    reader = new FileReader(args[0]);
    JSONParser jp = new JSONParser();
    JSONObject doc = (JSONObject) jp.parse(reader);
    JSONObject banks = (JSONObject) doc.get("banks");
    //Set bankKeys = banks.keySet();
    //Object [] bankNames = bankKeys.toArray();
    Object[] bankNames = banks.keySet().toArray();
    for (int i = 0; i < bankNames.length; i++) {

        //System.out.println(bankNames[i]);
        String bname = (String) bankNames[i];
        BankInfo2 binfo = new BankInfo2(bname);
        JSONObject banki = (JSONObject) banks.get(bname);
        JSONArray chain = (JSONArray) banki.get("chain");
        int chainLength = chain.size();
        //System.out.println(chainLength);
        for (Object chain1 : chain) {

            JSONObject serv = (JSONObject) chain1;
            ServerInfo sinfo = new ServerInfo((String) serv.get("ip"), serv.get("port").toString(),
                    serv.get("start_delay").toString(), serv.get("lifetime").toString(),
                    serv.get("receive").toString(), serv.get("send").toString());
            binfo.servers.add(sinfo);
            //System.out.println(serv.get("ip") + ":" + serv.get("port"));
        }
        BankArray.add(binfo);
    }
    //System.out.println("Done Processing Servers");
    JSONArray clients = (JSONArray) doc.get("clients");
    ArrayList<ClientInfo> clientsList = new ArrayList<ClientInfo>();
    for (int i = 0; i < clients.size(); i++) {
        JSONObject client_i = (JSONObject) clients.get(i);
        //This is for hard coded requests in the json file
        //System.out.println(client_i);
        //System.out.println(client_i.getClass());
        String typeOfClient = client_i.get("requests").getClass().toString();

        //This is for a client that has hardCoded requests
        if (typeOfClient.equals("class org.json.simple.JSONArray")) {
            //System.out.println("JSONArray");
            JSONArray requests = (JSONArray) client_i.get("requests");
            ClientInfo c = new ClientInfo(client_i.get("reply_timeout").toString(),
                    client_i.get("request_retries").toString(), client_i.get("resend_head").toString());
            c.prob_failure = client_i.get("prob_failure").toString();
            c.msg_send_delay = client_i.get("msg_delay").toString();
            System.out.println(
                    "Successfully added prob failure and msg_send " + c.prob_failure + "," + c.msg_send_delay);
            ArrayList<RequestInfo> req_list = new ArrayList<RequestInfo>();
            for (int j = 0; j < requests.size(); j++) {
                JSONObject request_j = (JSONObject) requests.get(j);
                String req = request_j.get("request").toString();
                String bank = request_j.get("" + "bank").toString();
                String acc = request_j.get("account").toString();
                String seq = request_j.get("seq_num").toString();
                String amt = null;
                try {
                    amt = request_j.get("amount").toString();
                } catch (NullPointerException e) {
                    //System.out.println("Amount not specified.");
                }
                RequestInfo r;
                if (amt == null) {
                    r = new RequestInfo(req, bank, acc, seq);
                } else {
                    r = new RequestInfo(req, bank, acc, amt, seq);
                }
                //RequestInfo r = new RequestInfo(request_j.get("request").toString(), request_j.get("bank").toString(), request_j.get("account").toString(), request_j.get("amount").toString());
                req_list.add(r);
            }
            c.requests = req_list;
            c.PortNumber = 60000 + i;
            clientsList.add(c);
            //System.out.println(client_i);
        }
        //This is for Random client requests
        else if (typeOfClient.equals("class org.json.simple.JSONObject")) {
            JSONObject randomReq = (JSONObject) client_i.get("requests");
            String seed = randomReq.get("seed").toString();
            String num_requests = randomReq.get("num_requests").toString();
            String prob_balance = randomReq.get("prob_balance").toString();
            String prob_deposit = randomReq.get("prob_deposit").toString();
            String prob_withdraw = randomReq.get("prob_withdrawal").toString();
            String prob_transfer = randomReq.get("prob_transfer").toString();
            //ClientInfo c = new ClientInfo(true, seed, num_requests, prob_balance, prob_deposit, prob_withdraw, prob_transfer);
            ClientInfo c = new ClientInfo(client_i.get("reply_timeout").toString(),
                    client_i.get("request_retries").toString(), client_i.get("resend_head").toString(), seed,
                    num_requests, prob_balance, prob_deposit, prob_withdraw, prob_transfer);
            c.PortNumber = 60000 + i;
            clientsList.add(c);
        }
    }
    //System.out.println(clients.size());
    double lowerPercent = 0.0;
    double upperPercent = 1.0;
    double result;
    String bankChainInfoMaster = "";
    for (int x = 0; x < BankArray.size(); x++) {
        BankInfo2 analyze = BankArray.get(x);
        String chain = analyze.bank_name + "#";
        //analyze.servers
        for (int j = 0; j < analyze.servers.size(); j++) {
            if (analyze.servers.get(j).Start_delay.equals("0")) {
                if (j == 0) {
                    chain += analyze.servers.get(j).Port;
                } else {
                    chain += "#" + analyze.servers.get(j).Port;
                }
            }
        }
        if (x == 0) {
            bankChainInfoMaster += chain;
        } else {
            bankChainInfoMaster += "@" + chain;
        }
    }
    //System.out.println("CHAIN: "+ bankChainInfoMaster);

    String clientInfoMaster = "";
    for (int x = 0; x < clientsList.size(); x++) {
        ClientInfo analyze = clientsList.get(x);
        if (x == 0) {
            clientInfoMaster += analyze.PortNumber;
        } else {
            clientInfoMaster += "#" + analyze.PortNumber;
        }

    }
    //System.out.println("Clients: "+ clientInfoMaster);

    //RUN MASTER HERE 
    String MasterPort = "49999";
    String masterExec = "java Master " + MasterPort + " " + clientInfoMaster + " " + bankChainInfoMaster;
    Process masterProcess = Runtime.getRuntime().exec(masterExec);
    System.out.println(masterExec);
    ArrayList<ServerInfoForClient> servInfoCli = new ArrayList<ServerInfoForClient>();

    // List of all servers is saved so that we can wait for them to exit.
    ArrayList<Process> serverPros = new ArrayList<Process>();
    //ArrayList<String> execServs = new ArrayList<String>();
    for (int i = 0; i < BankArray.size(); i++) {
        BankInfo2 analyze = BankArray.get(i);
        //System.out.println(analyze.bank_name);
        //One server in the chain
        String execCmd = "java Server ";
        String hIP = "", hPort = "", tIP = "", tPort = "", bn = "";
        bn = analyze.bank_name;
        boolean joinFlag = false;
        if (analyze.servers.size() == 2 && analyze.servers.get(1).Start_delay.equals("0")) {
            joinFlag = false;
        } else {
            joinFlag = true;
        }

        if (analyze.servers.size() == 1 && joinFlag == false) {
            //if(analyze.servers.size() == 1){
            ServerInfo si = analyze.servers.get(0);
            execCmd += "HEAD_TAIL " + si.IP + ":" + si.Port;
            execCmd += " localhost:0 localhost:0 localhost:" + MasterPort + " " + si.Start_delay + " "
                    + si.Lifetime + " " + si.Receive + " " + si.Send + " " + analyze.bank_name;
            ;
            hIP = si.IP;
            hPort = si.Port;
            tIP = si.IP;
            tPort = si.Port;
            System.out.println(execCmd);
            Thread.sleep(500);
            Process pro = Runtime.getRuntime().exec(execCmd);
            serverPros.add(pro);
            //}
        } else if (analyze.servers.size() == 2 && joinFlag == true) {
            ServerInfo si = analyze.servers.get(0);
            execCmd += "HEAD_TAIL " + si.IP + ":" + si.Port;
            execCmd += " localhost:0 localhost:0 localhost:" + MasterPort + " " + si.Start_delay + " "
                    + si.Lifetime + " " + si.Receive + " " + si.Send + " " + analyze.bank_name;
            ;
            hIP = si.IP;
            hPort = si.Port;
            tIP = si.IP;
            tPort = si.Port;
            System.out.println(execCmd);
            Thread.sleep(500);
            Process pro = Runtime.getRuntime().exec(execCmd);
            serverPros.add(pro);

            execCmd = "java Server ";
            ServerInfo si2 = analyze.servers.get(1);
            execCmd += "TAIL " + si2.IP + ":" + si2.Port;
            execCmd += " localhost:0 localhost:0 localhost:" + MasterPort + " " + si2.Start_delay + " "
                    + si2.Lifetime + " " + si2.Receive + " " + si2.Send + " " + analyze.bank_name;
            ;
            hIP = si.IP;
            hPort = si.Port;
            tIP = si.IP;
            tPort = si.Port;
            System.out.println(execCmd);
            Thread.sleep(500);
            Process pro2 = Runtime.getRuntime().exec(execCmd);
            serverPros.add(pro2);
        } else {
            int icount = 0;
            for (int x = 0; x < analyze.servers.size(); x++) {
                ServerInfo si = analyze.servers.get(x);
                if (si.Start_delay.equals("0")) {
                    icount++;
                }
            }
            System.out.println("icount:" + icount);
            for (int j = 0; j < icount; j++) {
                //for(int j = 0; j < analyze.servers.size(); j++){
                execCmd = "java Server ";
                ServerInfo si = analyze.servers.get(j);
                //Head server
                if (j == 0) {
                    ServerInfo siSucc = analyze.servers.get(j + 1);
                    execCmd += "HEAD " + si.IP + ":" + si.Port + " ";
                    execCmd += "localhost:0 " + siSucc.IP + ":" + siSucc.Port + " localhost:" + MasterPort;
                    execCmd += " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " "
                            + analyze.bank_name;
                    System.out.println(execCmd);
                    hIP = si.IP;
                    hPort = si.Port;

                }
                //Tail Server
                else if (j == (icount - 1)) {//analyze.servers.size() - 1) ){
                    ServerInfo siPred = analyze.servers.get(j - 1);
                    execCmd += "TAIL " + si.IP + ":" + si.Port + " ";
                    execCmd += siPred.IP + ":" + siPred.Port + " localhost:0 localhost:" + MasterPort;
                    execCmd += " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " "
                            + analyze.bank_name;
                    tIP = si.IP;
                    tPort = si.Port;
                    System.out.println(execCmd);
                }
                //Middle Server
                else {
                    ServerInfo siSucc = analyze.servers.get(j + 1);
                    ServerInfo siPred = analyze.servers.get(j - 1);
                    execCmd += "MIDDLE " + si.IP + ":" + si.Port + " ";
                    execCmd += siPred.IP + ":" + siPred.Port + " " + siSucc.IP + ":" + siSucc.Port
                            + " localhost:" + MasterPort;
                    execCmd += " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " "
                            + analyze.bank_name;
                    System.out.println(execCmd);
                }
                Thread.sleep(500);
                Process pro = Runtime.getRuntime().exec(execCmd);
                serverPros.add(pro);
            }
            for (int j = icount; j < analyze.servers.size(); j++) {
                execCmd = "java Server ";
                ServerInfo si = analyze.servers.get(j);
                ServerInfo siPred = analyze.servers.get(j - 1);
                execCmd += "TAIL " + si.IP + ":" + si.Port + " ";
                execCmd += siPred.IP + ":" + siPred.Port + " localhost:0 localhost:" + MasterPort;
                execCmd += " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " "
                        + analyze.bank_name;
                tIP = si.IP;
                tPort = si.Port;
                System.out.println(execCmd);
                Thread.sleep(500);
                Process pro = Runtime.getRuntime().exec(execCmd);
                serverPros.add(pro);
            }
        }
        ServerInfoForClient newServInfoForCli = new ServerInfoForClient(hPort, hIP, tPort, tIP, bn);
        servInfoCli.add(newServInfoForCli);
    }
    String banksCliParam = "";
    for (int i = 0; i < servInfoCli.size(); i++) {
        ServerInfoForClient temp = servInfoCli.get(i);
        String add = "@" + temp.bank_name + "#" + temp.HeadIP + ":" + temp.HeadPort + "#" + temp.TailIP + ":"
                + temp.TailPort;
        banksCliParam += add;
    }
    banksCliParam = banksCliParam.replaceFirst("@", "");
    //System.out.println(banksCliParam);

    // List of clients is saved so that we can wait for them to exit.
    ArrayList<Process> clientPros = new ArrayList<Process>();
    for (int i = 0; i < clientsList.size(); i++) {
        ClientInfo analyze = clientsList.get(i);
        String requestsString = "";
        if (analyze.isRandom) {
            double balance = Double.parseDouble(analyze.prob_balance);
            //System.out.println(analyze.prob_balance);
            double deposit = Double.parseDouble(analyze.prob_deposit);
            double withdraw = Double.parseDouble(analyze.prob_withdraw);
            int numRequests = Integer.parseInt(analyze.num_requests);
            for (int j = 0; j < numRequests; j++) {
                result = Math.random() * (1.0 - 0.0) + 0.0;
                int randAccount = (int) (Math.random() * (10001 - 0) + 0);
                double randAmount = Math.random() * (10001.00 - 0.0) + 0;
                int adjustMoney = (int) randAmount * 100;
                randAmount = (double) adjustMoney / 100.00;
                int randBank = (int) (Math.random() * (bankNames.length - 0) + 0);
                if (result < balance) {
                    //withdrawal#clientIPPORT%bank_name%accountnum%seq#amount
                    requestsString += "@balance#localhost:" + analyze.PortNumber + "%" + bankNames[randBank]
                            + "%" + randAccount + "%" + j;
                } else if (result < (deposit + balance)) {
                    requestsString += "@deposit#localhost:" + analyze.PortNumber + "%" + bankNames[randBank]
                            + "%" + randAccount + "%" + j + "#" + randAmount;
                } else {
                    requestsString += "@withdrawal#localhost:" + analyze.PortNumber + "%" + bankNames[randBank]
                            + "%" + randAccount + "%" + j + "#" + randAmount;
                }
            }

        } else {
            for (int j = 0; j < analyze.requests.size(); j++) {

                RequestInfo req = analyze.requests.get(j);
                //System.out.println("Sequence ###" + req.sequenceNum);
                if (req.request.equals("balance")) {
                    requestsString += "@" + req.request + "#localhost:" + analyze.PortNumber + "%"
                            + req.bankName + "%" + req.accountNum + "%" + req.sequenceNum;
                } else {
                    requestsString += "@" + req.request + "#localhost:" + analyze.PortNumber + "%"
                            + req.bankName + "%" + req.accountNum + "%" + req.sequenceNum + "#" + req.amount;
                }

            }
        }
        requestsString = requestsString.replaceFirst("@", "");
        String execCommand;
        int p = 60000 + i;
        if (analyze.isRandom) {
            execCommand = "java Client localhost:" + p + " " + banksCliParam + " " + requestsString + " "
                    + analyze.reply_timeout + " " + analyze.request_retries + " " + analyze.resend_head + " "
                    + analyze.prob_failure + " " + analyze.msg_send_delay + " " + analyze.prob_balance + ","
                    + analyze.prob_deposit + "," + analyze.prob_withdraw + "," + analyze.prob_transfer;
        } else {
            execCommand = "java Client localhost:" + p + " " + banksCliParam + " " + requestsString + " "
                    + analyze.reply_timeout + " " + analyze.request_retries + " " + analyze.resend_head + " "
                    + analyze.prob_failure + " " + analyze.msg_send_delay;

        }
        Thread.sleep(500);
        System.out.println(execCommand);
        System.out.println("Client " + (i + 1) + " started");
        Process cliPro = Runtime.getRuntime().exec(execCommand);
        clientPros.add(cliPro);
        //System.out.println(requestsString);
    }
    // Wait for all the clients to terminate
    for (Process clientPro : clientPros) {
        try {
            clientPro.waitFor();
            System.out.println("Client process finished.");
        } catch (InterruptedException e) {
            System.out.println("Interrupted while waiting for client.");
        }
    }
    // Sleep for two seconds
    Thread.sleep(2000);
    // Force termination of the servers
    for (Process serverPro : serverPros) {
        serverPro.destroy();
        System.out.println("Killed server.");
    }
    masterProcess.destroy();
    System.out.println("Killed Master");
    //System.out.println("asdf");
}

From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java

/**
 * @param args//from ww w.  j a  va 2  s .co  m
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the aggregate functions " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions"
            + " will be computed, followed by their temporal and spatial attribute indices");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";
    String preProcessingDatasets = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetAggregation = new ArrayList<String>();
    HashMap<String, String> datasetTempAtt = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>();
    HashMap<String, String> preProcessingDataset = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");
    String[] datasetArgs = cmd.getOptionValues("g");

    for (int i = 0; i < datasetArgs.length; i += 3) {
        String dataset = datasetArgs[i];

        // getting pre-processing
        String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3);
        if (tempPreProcessing == null) {
            System.out.println("No pre-processing available for " + dataset);
            continue;
        }
        preProcessingDataset.put(dataset, tempPreProcessing);

        shortDataset.add(dataset);
        datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1]));
        datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2]));

        datasetId.put(dataset, null);
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    Path path = null;
    FileSystem fs = null;

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        fs = FileSystem.get(new Configuration());
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3);

    // getting smallest resolution

    HashMap<String, String> tempResMap = new HashMap<String, String>();
    HashMap<String, String> spatialResMap = new HashMap<String, String>();

    HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>();

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String[] datasetArray = preProcessingDataset.get(dataset).split("-");

        String datasetTemporalStr = datasetArray[datasetArray.length - 2];
        int datasetTemporal = utils.temporalResolution(datasetTemporalStr);

        String datasetSpatialStr = datasetArray[datasetArray.length - 1];
        int datasetSpatial = utils.spatialResolution(datasetSpatialStr);

        // finding all possible resolutions

        String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal);
        String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial);

        String temporalResolution = "";
        String spatialResolution = "";

        String tempRes = "";
        String spatialRes = "";

        boolean dataAdded = false;

        for (int i = 0; i < temporalResolutions.length; i++) {
            for (int j = 0; j < spatialResolutions.length; j++) {

                temporalResolution = temporalResolutions[i];
                spatialResolution = spatialResolutions[j];

                String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";

                if (removeExistingFiles) {
                    FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3);
                }

                if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) {

                    dataAdded = true;

                    tempRes += temporalResolution + "-";
                    spatialRes += spatialResolution + "-";
                }
            }
        }

        if (dataAdded) {
            input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
            shortDatasetAggregation.add(dataset);

            tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1));
            spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1));

            datasetTemporalStrMap.put(dataset, datasetTemporalStr);
            datasetSpatialStrMap.put(dataset, datasetSpatialStr);
        }
    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have aggregates.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    it = input.iterator();
    while (it.hasNext()) {
        preProcessingDatasets += it.next() + ",";
    }

    Job aggJob = null;
    String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/";
    String jobName = "aggregates";

    FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3);

    Configuration aggConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    aggConf.set("dataset-name", datasetNames);
    aggConf.set("dataset-id", datasetIds);

    for (int i = 0; i < shortDatasetAggregation.size(); i++) {
        String dataset = shortDatasetAggregation.get(i);
        String id = datasetId.get(dataset);
        aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset));
        aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset));
        aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset));

        if (s3)
            aggConf.set("dataset-" + id,
                    s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
        else
            aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/"
                    + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
    }

    aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    aggConf.set("mapreduce.task.io.sort.mb", "200");
    aggConf.set("mapreduce.task.io.sort.factor", "100");
    machineConf.setMachineConfiguration(aggConf);

    if (s3) {
        machineConf.setMachineConfiguration(aggConf);
        aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
    }

    if (snappyCompression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    aggJob = new Job(aggConf);
    aggJob.setJobName(jobName);

    aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setMapOutputValueClass(AggregationArrayWritable.class);
    aggJob.setOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setOutputValueClass(FloatArrayWritable.class);
    //aggJob.setOutputKeyClass(Text.class);
    //aggJob.setOutputValueClass(Text.class);

    aggJob.setMapperClass(AggregationMapper.class);
    aggJob.setCombinerClass(AggregationCombiner.class);
    aggJob.setReducerClass(AggregationReducer.class);
    aggJob.setNumReduceTasks(machineConf.getNumberReduces());

    aggJob.setInputFormatClass(SequenceFileInputFormat.class);
    //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(aggJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(aggJob, true);
    FileInputFormat.setInputPaths(aggJob,
            preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1));
    FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir));

    aggJob.setJarByClass(Aggregation.class);

    long start = System.currentTimeMillis();
    aggJob.submit();
    aggJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetAggregation) {
        String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}

From source file:DIA_Umpire_Quant.DIA_Umpire_Quant.java

/**
 * @param args the command line arguments
 *///from   w w  w.jav  a2  s.co  m
public static void main(String[] args) throws FileNotFoundException, IOException, Exception {
    System.out.println(
            "=================================================================================================");
    System.out.println("DIA-Umpire quantitation with targeted re-extraction analysis (version: "
            + UmpireInfo.GetInstance().Version + ")");
    if (args.length != 1) {
        System.out.println(
                "command format error, it should be like: java -jar -Xmx10G DIA_Umpire_Quant.jar diaumpire_quant.params");
        return;
    }
    try {
        ConsoleLogger.SetConsoleLogger(Level.INFO);
        ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_quant.log");
    } catch (Exception e) {
    }

    try {

        Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version);
        Logger.getRootLogger().info("Parameter file:" + args[0]);

        BufferedReader reader = new BufferedReader(new FileReader(args[0]));
        String line = "";
        String WorkFolder = "";
        int NoCPUs = 2;

        String UserMod = "";
        String Combined_Prot = "";
        String InternalLibID = "";
        String ExternalLibPath = "";
        String ExternalLibDecoyTag = "DECOY";
        boolean DefaultProtFiltering = true;
        boolean DataSetLevelPepFDR = false;
        float ProbThreshold = 0.99f;
        float ExtProbThreshold = 0.99f;
        float Freq = 0f;
        int TopNPep = 6;
        int TopNFrag = 6;
        float MinFragMz = 200f;
        String FilterWeight = "GW";
        float MinWeight = 0.9f;
        float RTWindow_Int = -1f;
        float RTWindow_Ext = -1f;

        TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600);
        HashMap<String, File> AssignFiles = new HashMap<>();
        boolean InternalLibSearch = false;
        boolean ExternalLibSearch = false;

        boolean ExportSaint = false;
        boolean SAINT_MS1 = false;
        boolean SAINT_MS2 = true;

        HashMap<String, String[]> BaitList = new HashMap<>();
        HashMap<String, String> BaitName = new HashMap<>();
        HashMap<String, String[]> ControlList = new HashMap<>();
        HashMap<String, String> ControlName = new HashMap<>();

        //<editor-fold defaultstate="collapsed" desc="Reading parameter file">
        while ((line = reader.readLine()) != null) {
            line = line.trim();
            Logger.getRootLogger().info(line);
            if (!"".equals(line) && !line.startsWith("#")) {
                //System.out.println(line);
                if (line.equals("==File list begin")) {
                    do {
                        line = reader.readLine();
                        line = line.trim();
                        if (line.equals("==File list end")) {
                            continue;
                        } else if (!"".equals(line)) {
                            File newfile = new File(line);
                            if (newfile.exists()) {
                                AssignFiles.put(newfile.getAbsolutePath(), newfile);
                            } else {
                                Logger.getRootLogger().info("File: " + newfile + " does not exist.");
                            }
                        }
                    } while (!line.equals("==File list end"));
                }
                if (line.split("=").length < 2) {
                    continue;
                }
                String type = line.split("=")[0].trim();
                String value = line.split("=")[1].trim();
                switch (type) {
                case "TargetedExtraction": {
                    InternalLibSearch = Boolean.parseBoolean(value);
                    break;
                }
                case "InternalLibSearch": {
                    InternalLibSearch = Boolean.parseBoolean(value);
                    break;
                }
                case "ExternalLibSearch": {
                    ExternalLibSearch = Boolean.parseBoolean(value);
                    break;
                }

                case "Path": {
                    WorkFolder = value;
                    break;
                }
                case "path": {
                    WorkFolder = value;
                    break;
                }
                case "Thread": {
                    NoCPUs = Integer.parseInt(value);
                    break;
                }
                case "Fasta": {
                    tandemPara.FastaPath = value;
                    break;
                }
                case "Combined_Prot": {
                    Combined_Prot = value;
                    break;
                }
                case "DefaultProtFiltering": {
                    DefaultProtFiltering = Boolean.parseBoolean(value);
                    break;
                }
                case "DecoyPrefix": {
                    if (!"".equals(value)) {
                        tandemPara.DecoyPrefix = value;
                    }
                    break;
                }
                case "UserMod": {
                    UserMod = value;
                    break;
                }
                case "ProteinFDR": {
                    tandemPara.ProtFDR = Float.parseFloat(value);
                    break;
                }
                case "PeptideFDR": {
                    tandemPara.PepFDR = Float.parseFloat(value);
                    break;
                }
                case "DataSetLevelPepFDR": {
                    DataSetLevelPepFDR = Boolean.parseBoolean(value);
                    break;
                }
                case "InternalLibID": {
                    InternalLibID = value;
                    break;
                }
                case "ExternalLibPath": {
                    ExternalLibPath = value;
                    break;
                }
                case "ExtProbThreshold": {
                    ExtProbThreshold = Float.parseFloat(value);
                    break;
                }
                case "RTWindow_Int": {
                    RTWindow_Int = Float.parseFloat(value);
                    break;
                }
                case "RTWindow_Ext": {
                    RTWindow_Ext = Float.parseFloat(value);
                    break;
                }
                case "ExternalLibDecoyTag": {
                    ExternalLibDecoyTag = value;
                    if (ExternalLibDecoyTag.endsWith("_")) {
                        ExternalLibDecoyTag = ExternalLibDecoyTag.substring(0,
                                ExternalLibDecoyTag.length() - 1);
                    }
                    break;
                }
                case "ProbThreshold": {
                    ProbThreshold = Float.parseFloat(value);
                    break;
                }
                case "ReSearchProb": {
                    //ReSearchProb = Float.parseFloat(value);
                    break;
                }
                case "FilterWeight": {
                    FilterWeight = value;
                    break;
                }
                case "MinWeight": {
                    MinWeight = Float.parseFloat(value);
                    break;
                }
                case "TopNFrag": {
                    TopNFrag = Integer.parseInt(value);
                    break;
                }
                case "TopNPep": {
                    TopNPep = Integer.parseInt(value);
                    break;
                }
                case "Freq": {
                    Freq = Float.parseFloat(value);
                    break;
                }
                case "MinFragMz": {
                    MinFragMz = Float.parseFloat(value);
                    break;
                }

                //<editor-fold defaultstate="collapsed" desc="SaintOutput">
                case "ExportSaintInput": {
                    ExportSaint = Boolean.parseBoolean(value);
                    break;
                }
                case "QuantitationType": {
                    switch (value) {
                    case "MS1": {
                        SAINT_MS1 = true;
                        SAINT_MS2 = false;
                        break;
                    }
                    case "MS2": {
                        SAINT_MS1 = false;
                        SAINT_MS2 = true;
                        break;
                    }
                    case "BOTH": {
                        SAINT_MS1 = true;
                        SAINT_MS2 = true;
                        break;
                    }
                    }
                    break;
                }
                //                    case "BaitInputFile": {
                //                        SaintBaitFile = value;
                //                        break;
                //                    }
                //                    case "PreyInputFile": {
                //                        SaintPreyFile = value;
                //                        break;
                //                    }
                //                    case "InterationInputFile": {
                //                        SaintInteractionFile = value;
                //                        break;
                //                    }
                default: {
                    if (type.startsWith("BaitName_")) {
                        BaitName.put(type.substring(9), value);
                    }
                    if (type.startsWith("BaitFile_")) {
                        BaitList.put(type.substring(9), value.split("\t"));
                    }
                    if (type.startsWith("ControlName_")) {
                        ControlName.put(type.substring(12), value);
                    }
                    if (type.startsWith("ControlFile_")) {
                        ControlList.put(type.substring(12), value.split("\t"));
                    }
                    break;
                }
                //</editor-fold>                    
                }
            }
        }
        //</editor-fold>

        //Initialize PTM manager using compomics library
        PTMManager.GetInstance();
        if (!UserMod.equals("")) {
            PTMManager.GetInstance().ImportUserMod(UserMod);
        }

        //Check if the fasta file can be found
        if (!new File(tandemPara.FastaPath).exists()) {
            Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath
                    + " cannot be found, the process will be terminated, please check.");
            System.exit(1);
        }

        //Check if the prot.xml file can be found
        if (!new File(Combined_Prot).exists()) {
            Logger.getRootLogger().info("ProtXML file: " + Combined_Prot
                    + " cannot be found, the export protein summary table will be empty.");
        }

        LCMSID protID = null;

        //Parse prot.xml and generate protein master list given an FDR 
        if (Combined_Prot != null && !Combined_Prot.equals("")) {
            protID = LCMSID.ReadLCMSIDSerialization(Combined_Prot);
            if (!"".equals(Combined_Prot) && protID == null) {
                protID = new LCMSID(Combined_Prot, tandemPara.DecoyPrefix, tandemPara.FastaPath);
                ProtXMLParser protxmlparser = new ProtXMLParser(protID, Combined_Prot, 0f);
                //Use DIA-Umpire default protein FDR calculation
                if (DefaultProtFiltering) {
                    protID.RemoveLowLocalPWProtein(0.8f);
                    protID.RemoveLowMaxIniProbProtein(0.9f);
                    protID.FilterByProteinDecoyFDRUsingMaxIniProb(tandemPara.DecoyPrefix, tandemPara.ProtFDR);
                } //Get protein FDR calculation without other filtering
                else {
                    protID.FilterByProteinDecoyFDRUsingLocalPW(tandemPara.DecoyPrefix, tandemPara.ProtFDR);
                }
                protID.LoadSequence();
                protID.WriteLCMSIDSerialization(Combined_Prot);
            }
            Logger.getRootLogger().info("Protein No.:" + protID.ProteinList.size());
        }
        HashMap<String, HashMap<String, FragmentPeak>> IDSummaryFragments = new HashMap<>();

        //Generate DIA file list
        ArrayList<DIAPack> FileList = new ArrayList<>();

        File folder = new File(WorkFolder);
        if (!folder.exists()) {
            Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found.");
            System.exit(1);
        }
        for (final File fileEntry : folder.listFiles()) {
            if (fileEntry.isFile()
                    && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml")
                            | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml"))
                    && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml")
                    && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml")
                    && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) {
                AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry);
            }
            if (fileEntry.isDirectory()) {
                for (final File fileEntry2 : fileEntry.listFiles()) {
                    if (fileEntry2.isFile()
                            && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml")
                                    | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml"))
                            && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml")
                            && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml")
                            && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) {
                        AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2);
                    }
                }
            }
        }

        Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size());
        for (File fileEntry : AssignFiles.values()) {
            Logger.getRootLogger().info(fileEntry.getAbsolutePath());
            String mzXMLFile = fileEntry.getAbsolutePath();
            if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) {
                DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs);
                FileList.add(DiaFile);
                HashMap<String, FragmentPeak> FragMap = new HashMap<>();
                IDSummaryFragments.put(FilenameUtils.getBaseName(mzXMLFile), FragMap);
                Logger.getRootLogger().info(
                        "=================================================================================================");
                Logger.getRootLogger().info("Processing " + mzXMLFile);
                if (!DiaFile.LoadDIASetting()) {
                    Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete");
                    System.exit(1);
                }
                if (!DiaFile.LoadParams()) {
                    Logger.getRootLogger().info("Loading parameters failed, job is incomplete");
                    System.exit(1);
                }
            }
        }

        LCMSID combinePepID = null;
        if (DataSetLevelPepFDR) {
            combinePepID = LCMSID.ReadLCMSIDSerialization(WorkFolder + "combinePepID.SerFS");
            if (combinePepID == null) {
                FDR_DataSetLevel fdr = new FDR_DataSetLevel();
                fdr.GeneratePepIonList(FileList, tandemPara, WorkFolder + "combinePepID.SerFS");
                combinePepID = fdr.combineID;
                combinePepID.WriteLCMSIDSerialization(WorkFolder + "combinePepID.SerFS");
            }
        }

        //process each DIA file for quantification based on untargeted identifications
        for (DIAPack DiaFile : FileList) {
            long time = System.currentTimeMillis();
            Logger.getRootLogger().info("Loading identification results " + DiaFile.Filename + "....");

            //If the LCMSID serialization is found
            if (!DiaFile.ReadSerializedLCMSID()) {
                DiaFile.ParsePepXML(tandemPara, combinePepID);
                DiaFile.BuildStructure();
                if (!DiaFile.MS1FeatureMap.ReadPeakCluster()) {
                    Logger.getRootLogger().info("Loading peak and structure failed, job is incomplete");
                    System.exit(1);
                }
                DiaFile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster();
                //Generate mapping between index of precursor feature and pseudo MS/MS scan index 
                DiaFile.GenerateClusterScanNomapping();
                //Doing quantification
                DiaFile.AssignQuant();
                DiaFile.ClearStructure();
            }
            DiaFile.IDsummary.ReduceMemoryUsage();
            time = System.currentTimeMillis() - time;
            Logger.getRootLogger().info(DiaFile.Filename + " processed time:"
                    + String.format("%d hour, %d min, %d sec", TimeUnit.MILLISECONDS.toHours(time),
                            TimeUnit.MILLISECONDS.toMinutes(time)
                                    - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(time)),
                            TimeUnit.MILLISECONDS.toSeconds(time)
                                    - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(time))));
        }

        //<editor-fold defaultstate="collapsed" desc="Targete re-extraction using internal library">            
        Logger.getRootLogger().info(
                "=================================================================================================");
        if (InternalLibSearch && FileList.size() > 1) {
            Logger.getRootLogger().info("Module C: Targeted extraction using internal library");

            FragmentLibManager libManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder,
                    InternalLibID);
            if (libManager == null) {
                Logger.getRootLogger().info("Building internal spectral library");
                libManager = new FragmentLibManager(InternalLibID);
                ArrayList<LCMSID> LCMSIDList = new ArrayList<>();
                for (DIAPack dia : FileList) {
                    LCMSIDList.add(dia.IDsummary);
                }
                libManager.ImportFragLibTopFrag(LCMSIDList, Freq, TopNFrag);
                libManager.WriteFragmentLibSerialization(WorkFolder);
            }
            libManager.ReduceMemoryUsage();

            Logger.getRootLogger()
                    .info("Building retention time prediction model and generate candidate peptide list");
            for (int i = 0; i < FileList.size(); i++) {
                FileList.get(i).IDsummary.ClearMappedPep();
            }
            for (int i = 0; i < FileList.size(); i++) {
                for (int j = i + 1; j < FileList.size(); j++) {
                    RTAlignedPepIonMapping alignment = new RTAlignedPepIonMapping(WorkFolder,
                            FileList.get(i).GetParameter(), FileList.get(i).IDsummary,
                            FileList.get(j).IDsummary);
                    alignment.GenerateModel();
                    alignment.GenerateMappedPepIon();
                }
                FileList.get(i).ExportID();
                FileList.get(i).IDsummary = null;
            }

            Logger.getRootLogger().info("Targeted matching........");
            for (DIAPack diafile : FileList) {
                if (diafile.IDsummary == null) {
                    diafile.ReadSerializedLCMSID();
                }
                if (!diafile.IDsummary.GetMappedPepIonList().isEmpty()) {
                    diafile.UseMappedIon = true;
                    diafile.FilterMappedIonByProb = false;
                    diafile.BuildStructure();
                    diafile.MS1FeatureMap.ReadPeakCluster();
                    diafile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster();
                    diafile.GenerateMassCalibrationRTMap();
                    diafile.TargetedExtractionQuant(false, libManager, 1.1f, RTWindow_Int);
                    diafile.MS1FeatureMap.ClearAllPeaks();
                    diafile.IDsummary.ReduceMemoryUsage();
                    diafile.IDsummary.RemoveLowProbMappedIon(ProbThreshold);
                    diafile.ExportID();
                    Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size()
                            + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size());
                    diafile.ClearStructure();
                }
                diafile.IDsummary = null;
                System.gc();
            }
            Logger.getRootLogger().info(
                    "=================================================================================================");
        }
        //</editor-fold>

        //<editor-fold defaultstate="collapsed" desc="Targeted re-extraction using external library">
        //External library search
        if (ExternalLibSearch) {
            Logger.getRootLogger().info("Module C: Targeted extraction using external library");

            //Read exteranl library
            FragmentLibManager ExlibManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder,
                    FilenameUtils.getBaseName(ExternalLibPath));
            if (ExlibManager == null) {
                ExlibManager = new FragmentLibManager(FilenameUtils.getBaseName(ExternalLibPath));

                //Import traML file
                ExlibManager.ImportFragLibByTraML(ExternalLibPath, ExternalLibDecoyTag);
                //Check if there are decoy spectra
                ExlibManager.CheckDecoys();
                //ExlibManager.ImportFragLibBySPTXT(ExternalLibPath);
                ExlibManager.WriteFragmentLibSerialization(WorkFolder);
            }
            Logger.getRootLogger()
                    .info("No. of peptide ions in external lib:" + ExlibManager.PeptideFragmentLib.size());
            for (DIAPack diafile : FileList) {
                if (diafile.IDsummary == null) {
                    diafile.ReadSerializedLCMSID();
                }
                //Generate RT mapping
                RTMappingExtLib RTmap = new RTMappingExtLib(diafile.IDsummary, ExlibManager,
                        diafile.GetParameter());
                RTmap.GenerateModel();
                RTmap.GenerateMappedPepIon();

                diafile.BuildStructure();
                diafile.MS1FeatureMap.ReadPeakCluster();
                diafile.GenerateMassCalibrationRTMap();
                //Perform targeted re-extraction
                diafile.TargetedExtractionQuant(false, ExlibManager, ProbThreshold, RTWindow_Ext);
                diafile.MS1FeatureMap.ClearAllPeaks();
                diafile.IDsummary.ReduceMemoryUsage();
                //Remove target IDs below the defined probability threshold
                diafile.IDsummary.RemoveLowProbMappedIon(ExtProbThreshold);
                diafile.ExportID();
                diafile.ClearStructure();
                Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size()
                        + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size());
            }
        }
        //</editor-fold>

        //<editor-fold defaultstate="collapsed" desc="Peptide and fragment selection">
        Logger.getRootLogger().info("Peptide and fragment selection across the whole dataset");
        ArrayList<LCMSID> SummaryList = new ArrayList<>();
        for (DIAPack diafile : FileList) {
            if (diafile.IDsummary == null) {
                diafile.ReadSerializedLCMSID();
                diafile.IDsummary.ClearAssignPeakCluster();
                //diafile.IDsummary.ClearPSMs();                    
            }
            if (protID != null) {
                //Generate protein list according to mapping of peptide ions for each DIA file to the master protein list
                diafile.IDsummary.GenerateProteinByRefIDByPepSeq(protID, true);
                diafile.IDsummary.ReMapProPep();
            }
            if ("GW".equals(FilterWeight)) {
                diafile.IDsummary.SetFilterByGroupWeight();
            } else if ("PepW".equals(FilterWeight)) {
                diafile.IDsummary.SetFilterByWeight();
            }
            SummaryList.add(diafile.IDsummary);
        }
        FragmentSelection fragselection = new FragmentSelection(SummaryList);
        fragselection.freqPercent = Freq;
        fragselection.MinFragMZ = MinFragMz;
        fragselection.GeneratePepFragScoreMap();
        fragselection.GenerateTopFragMap(TopNFrag);
        fragselection.GenerateProtPepScoreMap(MinWeight);
        fragselection.GenerateTopPepMap(TopNPep);
        //</editor-fold>

        //<editor-fold defaultstate="collapsed" desc="Writing general reports">                 
        ExportTable export = new ExportTable(WorkFolder, SummaryList, IDSummaryFragments, protID,
                fragselection);
        export.Export(TopNPep, TopNFrag, Freq);
        //</editor-fold>

        //<editor-fold defaultstate="collapsed" desc="//<editor-fold defaultstate="collapsed" desc="Generate SAINT input files">
        if (ExportSaint && protID != null) {
            HashMap<String, DIAPack> Filemap = new HashMap<>();
            for (DIAPack DIAfile : FileList) {
                Filemap.put(DIAfile.GetBaseName(), DIAfile);
            }

            FileWriter baitfile = new FileWriter(WorkFolder + "SAINT_Bait_" + DateTimeTag.GetTag() + ".txt");
            FileWriter preyfile = new FileWriter(WorkFolder + "SAINT_Prey_" + DateTimeTag.GetTag() + ".txt");
            FileWriter interactionfileMS1 = null;
            FileWriter interactionfileMS2 = null;
            if (SAINT_MS1) {
                interactionfileMS1 = new FileWriter(
                        WorkFolder + "SAINT_Interaction_MS1_" + DateTimeTag.GetTag() + ".txt");
            }
            if (SAINT_MS2) {
                interactionfileMS2 = new FileWriter(
                        WorkFolder + "SAINT_Interaction_MS2_" + DateTimeTag.GetTag() + ".txt");
            }
            HashMap<String, String> PreyID = new HashMap<>();

            for (String samplekey : ControlName.keySet()) {
                String name = ControlName.get(samplekey);
                for (String file : ControlList.get(samplekey)) {
                    baitfile.write(FilenameUtils.getBaseName(file) + "\t" + name + "\t" + "C\n");
                    LCMSID IDsummary = Filemap.get(FilenameUtils.getBaseName(file)).IDsummary;
                    if (SAINT_MS1) {
                        SaintOutput(protID, IDsummary, fragselection, interactionfileMS1, file, name, PreyID,
                                1);
                    }
                    if (SAINT_MS2) {
                        SaintOutput(protID, IDsummary, fragselection, interactionfileMS2, file, name, PreyID,
                                2);
                    }
                }
            }
            for (String samplekey : BaitName.keySet()) {
                String name = BaitName.get(samplekey);
                for (String file : BaitList.get(samplekey)) {
                    baitfile.write(FilenameUtils.getBaseName(file) + "\t" + name + "\t" + "T\n");
                    LCMSID IDsummary = Filemap.get(FilenameUtils.getBaseName(file)).IDsummary;
                    if (SAINT_MS1) {
                        SaintOutput(protID, IDsummary, fragselection, interactionfileMS1, file, name, PreyID,
                                1);
                    }
                    if (SAINT_MS2) {
                        SaintOutput(protID, IDsummary, fragselection, interactionfileMS2, file, name, PreyID,
                                2);
                    }
                }
            }
            baitfile.close();
            if (SAINT_MS1) {
                interactionfileMS1.close();
            }
            if (SAINT_MS2) {
                interactionfileMS2.close();
            }
            for (String AccNo : PreyID.keySet()) {
                preyfile.write(AccNo + "\t" + PreyID.get(AccNo) + "\n");
            }
            preyfile.close();
        }

        //</editor-fold>

        Logger.getRootLogger().info("Job done");
        Logger.getRootLogger().info(
                "=================================================================================================");

    } catch (Exception e) {
        Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e));
        throw e;
    }
}