List of usage examples for java.util ArrayList get
public E get(int index)
From source file:TestBufferStreamGenomicsDBImporter.java
/** * Sample driver code for testing Java VariantContext write API for GenomicsDB * The code shows two ways of using the API * (a) Iterator<VariantContext>// w w w . jav a 2 s. co m * (b) Directly adding VariantContext objects * If "-iterators" is passed as the second argument, method (a) is used. */ public static void main(final String[] args) throws IOException, GenomicsDBException, ParseException { if (args.length < 2) { System.err.println("For loading: [-iterators] <loader.json> " + "<stream_name_to_file.json> [bufferCapacity rank lbRowIdx ubRowIdx useMultiChromosomeIterator]"); System.exit(-1); } int argsLoaderFileIdx = 0; if (args[0].equals("-iterators")) argsLoaderFileIdx = 1; //Buffer capacity long bufferCapacity = (args.length >= argsLoaderFileIdx + 3) ? Integer.parseInt(args[argsLoaderFileIdx + 2]) : 1024; //Specify rank (or partition idx) of this process int rank = (args.length >= argsLoaderFileIdx + 4) ? Integer.parseInt(args[argsLoaderFileIdx + 3]) : 0; //Specify smallest row idx from which to start loading. // This is useful for incremental loading into existing array long lbRowIdx = (args.length >= argsLoaderFileIdx + 5) ? Long.parseLong(args[argsLoaderFileIdx + 4]) : 0; //Specify largest row idx up to which loading should be performed - for completeness long ubRowIdx = (args.length >= argsLoaderFileIdx + 6) ? Long.parseLong(args[argsLoaderFileIdx + 5]) : Long.MAX_VALUE - 1; //Boolean to use MultipleChromosomeIterator boolean useMultiChromosomeIterator = (args.length >= argsLoaderFileIdx + 7) ? Boolean.parseBoolean(args[argsLoaderFileIdx + 6]) : false; //<loader.json> first arg String loaderJSONFile = args[argsLoaderFileIdx]; GenomicsDBImporter loader = new GenomicsDBImporter(loaderJSONFile, rank, lbRowIdx, ubRowIdx); //<stream_name_to_file.json> - useful for the driver only //JSON file that contains "stream_name": "vcf_file_path" entries FileReader mappingReader = new FileReader(args[argsLoaderFileIdx + 1]); JSONParser parser = new JSONParser(); LinkedHashMap streamNameToFileName = (LinkedHashMap) parser.parse(mappingReader, new LinkedHashFactory()); ArrayList<VCFFileStreamInfo> streamInfoVec = new ArrayList<VCFFileStreamInfo>(); long rowIdx = 0; for (Object currObj : streamNameToFileName.entrySet()) { Map.Entry<String, String> entry = (Map.Entry<String, String>) currObj; VCFFileStreamInfo currInfo = new VCFFileStreamInfo(entry.getValue(), loaderJSONFile, rank, useMultiChromosomeIterator); /** The following 2 lines are not mandatory - use initializeSampleInfoMapFromHeader() * iff you know for sure that sample names in the VCF header are globally unique * across all streams/files. If not, you have 2 options: * (a) specify your own mapping from sample index in the header to SampleInfo object * (unique_name, rowIdx) OR * (b) specify the mapping in the callset_mapping_file (JSON) and pass null to * addSortedVariantContextIterator() */ LinkedHashMap<Integer, GenomicsDBImporter.SampleInfo> sampleIndexToInfo = new LinkedHashMap<Integer, GenomicsDBImporter.SampleInfo>(); rowIdx = GenomicsDBImporter.initializeSampleInfoMapFromHeader(sampleIndexToInfo, currInfo.mVCFHeader, rowIdx); int streamIdx = -1; if (args[0].equals("-iterators")) streamIdx = loader.addSortedVariantContextIterator(entry.getKey(), currInfo.mVCFHeader, currInfo.mIterator, bufferCapacity, VariantContextWriterBuilder.OutputType.BCF_STREAM, sampleIndexToInfo); //pass sorted VC iterators else //use buffers - VCs will be provided by caller streamIdx = loader.addBufferStream(entry.getKey(), currInfo.mVCFHeader, bufferCapacity, VariantContextWriterBuilder.OutputType.BCF_STREAM, sampleIndexToInfo); currInfo.mStreamIdx = streamIdx; streamInfoVec.add(currInfo); } if (args[0].equals("-iterators")) { //Much simpler interface if using Iterator<VariantContext> loader.importBatch(); assert loader.isDone(); } else { //Must be called after all iterators/streams added - no more iterators/streams // can be added once this function is called loader.setupGenomicsDBImporter(); //Counts and tracks buffer streams for which new data must be supplied //Initialized to all the buffer streams int numExhaustedBufferStreams = streamInfoVec.size(); int[] exhaustedBufferStreamIdxs = new int[numExhaustedBufferStreams]; for (int i = 0; i < numExhaustedBufferStreams; ++i) exhaustedBufferStreamIdxs[i] = i; while (!loader.isDone()) { //Add data for streams that were exhausted in the previous round for (int i = 0; i < numExhaustedBufferStreams; ++i) { VCFFileStreamInfo currInfo = streamInfoVec.get(exhaustedBufferStreamIdxs[i]); boolean added = true; while (added && (currInfo.mIterator.hasNext() || currInfo.mNextVC != null)) { if (currInfo.mNextVC != null) added = loader.add(currInfo.mNextVC, currInfo.mStreamIdx); if (added) if (currInfo.mIterator.hasNext()) currInfo.mNextVC = currInfo.mIterator.next(); else currInfo.mNextVC = null; } } loader.importBatch(); numExhaustedBufferStreams = (int) loader.getNumExhaustedBufferStreams(); for (int i = 0; i < numExhaustedBufferStreams; ++i) exhaustedBufferStreamIdxs[i] = loader.getExhaustedBufferStreamIndex(i); } } }
From source file:guardar.en.base.de.datos.MainServidor.java
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException, ClassNotFoundException { Mongo mongo = new Mongo("localhost", 27017); // nombre de la base de datos DB database = mongo.getDB("paginas"); // coleccion de la db DBCollection collection = database.getCollection("indice"); DBCollection collection_textos = database.getCollection("tabla"); ArrayList<String> lista_textos = new ArrayList(); try {/*from w w w .jav a 2 s .c o m*/ ServerSocket servidor = new ServerSocket(4545); // Crear un servidor en pausa hasta que un cliente llegue. while (true) { String aux = new String(); lista_textos.clear(); Socket clienteNuevo = servidor.accept();// Si llega se acepta. // Queda en pausa otra vez hasta que un objeto llegue. ObjectInputStream entrada = new ObjectInputStream(clienteNuevo.getInputStream()); JSONObject request = (JSONObject) entrada.readObject(); String b = (String) request.get("id"); //hacer una query a la base de datos con la palabra que se quiere obtener BasicDBObject query = new BasicDBObject("palabra", b); DBCursor cursor = collection.find(query); ArrayList<DocumentosDB> lista_doc = new ArrayList<>(); // de la query tomo el campo documentos y los agrego a una lista try { while (cursor.hasNext()) { //System.out.println(cursor.next()); BasicDBList campo_documentos = (BasicDBList) cursor.next().get("documentos"); // en el for voy tomando uno por uno los elementos en el campo documentos for (Iterator<Object> it = campo_documentos.iterator(); it.hasNext();) { BasicDBObject dbo = (BasicDBObject) it.next(); //DOC tiene id y frecuencia DocumentosDB doc = new DocumentosDB(); doc.makefn2(dbo); //int id = (int)doc.getId_documento(); //int f = (int)doc.getFrecuencia(); lista_doc.add(doc); //******************************************* //******************************************** //QUERY A LA COLECCION DE TEXTOS /* BasicDBObject query_textos = new BasicDBObject("id", doc.getId_documento());//query DBCursor cursor_textos = collection_textos.find(query_textos); try { while (cursor_textos.hasNext()) { DBObject obj = cursor_textos.next(); String titulo = (String) obj.get("titulo"); titulo = titulo + "\n\n"; String texto = (String) obj.get("texto"); String texto_final = titulo + texto; aux = texto_final; lista_textos.add(texto_final); } } finally { cursor_textos.close(); }*/ //System.out.println(doc.getId_documento()); //System.out.println(doc.getFrecuencia()); } // end for } //end while query } finally { cursor.close(); } // ordeno la lista de menor a mayor Collections.sort(lista_doc, new Comparator<DocumentosDB>() { @Override public int compare(DocumentosDB o1, DocumentosDB o2) { //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. return o1.getFrecuencia().compareTo(o2.getFrecuencia()); } }); int tam = lista_doc.size() - 1; for (int j = tam; j >= 0; j--) { BasicDBObject query_textos = new BasicDBObject("id", (int) lista_doc.get(j).getId_documento().intValue());//query DBCursor cursor_textos = collection_textos.find(query_textos);// lo busco try { while (cursor_textos.hasNext()) { DBObject obj = cursor_textos.next(); String titulo = "*******************************"; titulo += (String) obj.get("titulo"); int f = (int) lista_doc.get(j).getFrecuencia().intValue(); String strinf = Integer.toString(f); titulo += "******************************* frecuencia:" + strinf; titulo = titulo + "\n\n"; String texto = (String) obj.get("texto"); String texto_final = titulo + texto + "\n\n"; aux = aux + texto_final; //lista_textos.add(texto_final); } } finally { cursor_textos.close(); } } //actualizar el cache try { Socket cliente_cache = new Socket("localhost", 4500); // nos conectamos con el servidor ObjectOutputStream mensaje_cache = new ObjectOutputStream(cliente_cache.getOutputStream()); // get al output del servidor, que es cliente : socket del cliente q se conecto al server JSONObject actualizacion_cache = new JSONObject(); actualizacion_cache.put("actualizacion", 1); actualizacion_cache.put("busqueda", b); actualizacion_cache.put("respuesta", aux); mensaje_cache.writeObject(actualizacion_cache); // envio el msj al servidor } catch (Exception ex) { } //RESPONDER DESDE EL SERVIDORIndex al FRONT ObjectOutputStream resp = new ObjectOutputStream(clienteNuevo.getOutputStream());// obtengo el output del cliente para mandarle un msj resp.writeObject(aux); System.out.println("msj enviado desde el servidor"); } } catch (IOException ex) { Logger.getLogger(MainServidor.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java
/** * @param args/*from w ww . j a va 2 s . c om*/ * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0]; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2."); firstGroup.addAll(secondGroup); } if (secondGroup.isEmpty()) { System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1."); secondGroup.addAll(firstGroup); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3); String dataAttributesInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { dataAttributesInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "correlation"; String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/"; FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(SpatioTemporalValueWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationTechniquesMapper.class); job.setReducerClass(CorrelationTechniquesReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir)); job.setJarByClass(CorrelationTechniques.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }
From source file:net.massbank.validator.RecordValidator.java
public static void main(String[] args) { RequestDummy request;/*w w w . j a v a 2 s . c o m*/ PrintStream out = System.out; Options lvOptions = new Options(); lvOptions.addOption("h", "help", false, "show this help."); lvOptions.addOption("r", "recdata", true, "points to the recdata directory containing massbank records. Reads all *.txt files in there."); CommandLineParser lvParser = new BasicParser(); CommandLine lvCmd = null; try { lvCmd = lvParser.parse(lvOptions, args); if (lvCmd.hasOption('h')) { printHelp(lvOptions); return; } } catch (org.apache.commons.cli.ParseException pvException) { System.out.println(pvException.getMessage()); } String recDataPath = lvCmd.getOptionValue("recdata"); // --------------------------------------------- // ???? // --------------------------------------------- final String baseUrl = MassBankEnv.get(MassBankEnv.KEY_BASE_URL); final String dbRootPath = "./"; final String dbHostName = MassBankEnv.get(MassBankEnv.KEY_DB_HOST_NAME); final String tomcatTmpPath = "."; final String tmpPath = (new File(tomcatTmpPath + sdf.format(new Date()))).getPath() + File.separator; GetConfig conf = new GetConfig(baseUrl); int recVersion = 2; String selDbName = ""; Object up = null; // Was: file Upload boolean isResult = true; String upFileName = ""; boolean upResult = false; DatabaseAccess db = null; try { // ---------------------------------------------------- // ??? // ---------------------------------------------------- // if (FileUpload.isMultipartContent(request)) { // (new File(tmpPath)).mkdir(); // String os = System.getProperty("os.name"); // if (os.indexOf("Windows") == -1) { // isResult = FileUtil.changeMode("777", tmpPath); // if (!isResult) { // out.println(msgErr("[" + tmpPath // + "] chmod failed.")); // return; // } // } // up = new FileUpload(request, tmpPath); // } // ---------------------------------------------------- // ?DB???? // ---------------------------------------------------- List<String> dbNameList = Arrays.asList(conf.getDbName()); ArrayList<String> dbNames = new ArrayList<String>(); dbNames.add(""); File[] dbDirs = (new File(dbRootPath)).listFiles(); if (dbDirs != null) { for (File dbDir : dbDirs) { if (dbDir.isDirectory()) { int pos = dbDir.getName().lastIndexOf("\\"); String dbDirName = dbDir.getName().substring(pos + 1); pos = dbDirName.lastIndexOf("/"); dbDirName = dbDirName.substring(pos + 1); if (dbNameList.contains(dbDirName)) { // DB???massbank.conf???DB???? dbNames.add(dbDirName); } } } } if (dbDirs == null || dbNames.size() == 0) { out.println(msgErr("[" + dbRootPath + "] directory not exist.")); return; } Collections.sort(dbNames); // ---------------------------------------------------- // ? // ---------------------------------------------------- // if (FileUpload.isMultipartContent(request)) { // HashMap<String, String[]> reqParamMap = new HashMap<String, // String[]>(); // reqParamMap = up.getRequestParam(); // if (reqParamMap != null) { // for (Map.Entry<String, String[]> req : reqParamMap // .entrySet()) { // if (req.getKey().equals("ver")) { // try { // recVersion = Integer // .parseInt(req.getValue()[0]); // } catch (NumberFormatException nfe) { // } // } else if (req.getKey().equals("db")) { // selDbName = req.getValue()[0]; // } // } // } // } else { // if (request.getParameter("ver") != null) { // try { // recVersion = Integer.parseInt(request // .getParameter("ver")); // } catch (NumberFormatException nfe) { // } // } // selDbName = request.getParameter("db"); // } // if (selDbName == null || selDbName.equals("") // || !dbNames.contains(selDbName)) { // selDbName = dbNames.get(0); // } // --------------------------------------------- // // --------------------------------------------- out.println("Database: "); for (int i = 0; i < dbNames.size(); i++) { String dbName = dbNames.get(i); out.print("dbName"); if (dbName.equals(selDbName)) { out.print(" selected"); } if (i == 0) { out.println("------------------"); } else { out.println(dbName); } } out.println("Record Version : "); out.println(recVersion); out.println("Record Archive :"); // --------------------------------------------- // // --------------------------------------------- // HashMap<String, Boolean> upFileMap = up.doUpload(); // if (upFileMap != null) { // for (Map.Entry<String, Boolean> e : upFileMap.entrySet()) { // upFileName = e.getKey(); // upResult = e.getValue(); // break; // } // if (upFileName.equals("")) { // out.println(msgErr("please select file.")); // isResult = false; // } else if (!upResult) { // out.println(msgErr("[" + upFileName // + "] upload failed.")); // isResult = false; // } else if (!upFileName.endsWith(ZIP_EXTENSION) // && !upFileName.endsWith(MSBK_EXTENSION)) { // out.println(msgErr("please select [" // + UPLOAD_RECDATA_ZIP // + "] or [" // + UPLOAD_RECDATA_MSBK + "].")); // up.deleteFile(upFileName); // isResult = false; // } // } else { // out.println(msgErr("server error.")); // isResult = false; // } // up.deleteFileItem(); // if (!isResult) { // return; // } // --------------------------------------------- // ??? // --------------------------------------------- // final String upFilePath = (new File(tmpPath + File.separator // + upFileName)).getPath(); // isResult = FileUtil.unZip(upFilePath, tmpPath); // if (!isResult) { // out.println(msgErr("[" // + upFileName // + "] extraction failed. possibility of time-out.")); // return; // } // --------------------------------------------- // ?? // --------------------------------------------- final String recPath = (new File(dbRootPath + File.separator + selDbName)).getPath(); File tmpRecDir = new File(recDataPath); if (!tmpRecDir.isDirectory()) { tmpRecDir.mkdirs(); } // --------------------------------------------- // ??? // --------------------------------------------- // data? // final String recDataPath = (new File(tmpPath + File.separator // + RECDATA_DIR_NAME)).getPath() // + File.separator; // // if (!(new File(recDataPath)).isDirectory()) { // if (upFileName.endsWith(ZIP_EXTENSION)) { // out.println(msgErr("[" // + RECDATA_DIR_NAME // + "] directory is not included in the up-loading file.")); // } else if (upFileName.endsWith(MSBK_EXTENSION)) { // out.println(msgErr("The uploaded file is not record data.")); // } // return; // } // --------------------------------------------- // DB // --------------------------------------------- // db = new DatabaseAccess(dbHostName, selDbName); // isResult = db.open(); // if (!isResult) { // db.close(); // out.println(msgErr("not connect to database.")); // return; // } // --------------------------------------------- // ?? // --------------------------------------------- TreeMap<String, String> resultMap = validationRecord(db, out, recDataPath, recPath, recVersion); if (resultMap.size() == 0) { return; } // --------------------------------------------- // ? // --------------------------------------------- isResult = dispResult(out, resultMap); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (db != null) { db.close(); } File tmpDir = new File(tmpPath); if (tmpDir.exists()) { FileUtil.removeDir(tmpDir.getPath()); } } }
From source file:DIA_Umpire_Quant.DIA_Umpire_IntLibSearch.java
/** * @param args the command line arguments *///w w w .j a v a2 s .c o m public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire targeted re-extraction analysis using internal library (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be : java -jar -Xmx10G DIA_Umpire_IntLibSearch.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_intlibsearch.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; String InternalLibID = ""; float ProbThreshold = 0.99f; float RTWindow_Int = -1f; float Freq = 0f; int TopNFrag = 6; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "InternalLibID": { InternalLibID = value; break; } case "RTWindow_Int": { RTWindow_Int = Float.parseFloat(value); break; } case "ProbThreshold": { ProbThreshold = Float.parseFloat(value); break; } case "TopNFrag": { TopNFrag = Integer.parseInt(value); break; } case "Freq": { Freq = Float.parseFloat(value); break; } case "Fasta": { tandemPara.FastaPath = value; break; } } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Check if the fasta file can be found if (!new File(tandemPara.FastaPath).exists()) { Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath + " cannot be found, the process will be terminated, please check."); System.exit(1); } //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); try { File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); //If the serialization file for ID file existed if (DiaFile.ReadSerializedLCMSID()) { DiaFile.IDsummary.ReduceMemoryUsage(); DiaFile.IDsummary.FastaPath = tandemPara.FastaPath; FileList.add(DiaFile); } } } //<editor-fold defaultstate="collapsed" desc="Targete re-extraction using internal library"> Logger.getRootLogger().info( "================================================================================================="); if (FileList.size() > 1) { Logger.getRootLogger().info("Targeted re-extraction using internal library"); FragmentLibManager libManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder, InternalLibID); if (libManager == null) { Logger.getRootLogger().info("Building internal spectral library"); libManager = new FragmentLibManager(InternalLibID); ArrayList<LCMSID> LCMSIDList = new ArrayList<>(); for (DIAPack dia : FileList) { LCMSIDList.add(dia.IDsummary); } libManager.ImportFragLibTopFrag(LCMSIDList, Freq, TopNFrag); libManager.WriteFragmentLibSerialization(WorkFolder); } libManager.ReduceMemoryUsage(); Logger.getRootLogger() .info("Building retention time prediction model and generate candidate peptide list"); for (int i = 0; i < FileList.size(); i++) { FileList.get(i).IDsummary.ClearMappedPep(); } for (int i = 0; i < FileList.size(); i++) { for (int j = i + 1; j < FileList.size(); j++) { RTAlignedPepIonMapping alignment = new RTAlignedPepIonMapping(WorkFolder, FileList.get(i).GetParameter(), FileList.get(i).IDsummary, FileList.get(j).IDsummary); alignment.GenerateModel(); alignment.GenerateMappedPepIon(); } FileList.get(i).ExportID(); FileList.get(i).IDsummary = null; } Logger.getRootLogger().info("Targeted matching........"); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); } if (!diafile.IDsummary.GetMappedPepIonList().isEmpty()) { diafile.UseMappedIon = true; diafile.FilterMappedIonByProb = false; diafile.BuildStructure(); diafile.MS1FeatureMap.ReadPeakCluster(); diafile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster(); diafile.GenerateMassCalibrationRTMap(); diafile.TargetedExtractionQuant(false, libManager, ProbThreshold, RTWindow_Int); diafile.MS1FeatureMap.ClearAllPeaks(); diafile.IDsummary.ReduceMemoryUsage(); diafile.IDsummary.RemoveLowProbMappedIon(ProbThreshold); diafile.ExportID(); Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size() + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size()); diafile.ClearStructure(); } diafile.IDsummary = null; System.gc(); } Logger.getRootLogger().info( "================================================================================================="); } //</editor-fold> Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); } catch (Exception e) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e)); throw e; } }
From source file:edu.nyu.vida.data_polygamy.relationship_computation.Relationship.java
/** * @param args//from w w w. j ava 2 s . c o m * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option scoreOption = new Option("sc", "score", true, "set threhsold for relationship score"); scoreOption.setRequired(false); scoreOption.setArgName("SCORE THRESHOLD"); options.addOption(scoreOption); Option strengthOption = new Option("st", "strength", true, "set threhsold for relationship strength"); strengthOption.setRequired(false); strengthOption.setArgName("STRENGTH THRESHOLD"); options.addOption(strengthOption); Option completeRandomizationOption = new Option("c", "complete-randomization", false, "use complete randomization when performing significance tests"); completeRandomizationOption.setRequired(false); options.addOption(completeRandomizationOption); Option idOption = new Option("id", "ids", false, "output id instead of names for datasets and attributes"); idOption.setRequired(false); options.addOption(idOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); Option removeOption = new Option("r", "remove-not-significant", false, "remove relationships that are not" + "significant from the final output"); removeOption.setRequired(false); options.addOption(removeOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeNotSignificant = cmd.hasOption("r"); boolean removeExistingFiles = cmd.hasOption("f"); boolean completeRandomization = cmd.hasOption("c"); boolean hasScoreThreshold = cmd.hasOption("sc"); boolean hasStrengthThreshold = cmd.hasOption("st"); boolean outputIds = cmd.hasOption("id"); String scoreThreshold = hasScoreThreshold ? cmd.getOptionValue("sc") : ""; String strengthThreshold = hasStrengthThreshold ? cmd.getOptionValue("st") : ""; // all datasets ArrayList<String> all_datasets = new ArrayList<String>(); if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { all_datasets.add(line.split("\t")[0]); line = br.readLine(); } br.close(); if (s3) fs.close(); String[] all_datasets_array = new String[all_datasets.size()]; all_datasets.toArray(all_datasets_array); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : all_datasets_array; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("No indices from datasets in G1."); System.exit(0); } if (secondGroup.isEmpty()) { System.out.println("No indices from datasets in G2."); System.exit(0); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); all_datasets.add(dt[0]); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); String relationshipsDir = ""; if (outputIds) { relationshipsDir = FrameworkUtils.relationshipsIdsDir; } else { relationshipsDir = FrameworkUtils.relationshipsDir; } FrameworkUtils.createDir(s3bucket + relationshipsDir, s3conf, s3); String random = completeRandomization ? "complete" : "restricted"; String indexInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { indexInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "relationship" + "-" + random; String relationshipOutputDir = s3bucket + relationshipsDir + "/tmp/"; FrameworkUtils.removeFile(relationshipOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("complete-random", String.valueOf(completeRandomization)); conf.set("output-ids", String.valueOf(outputIds)); conf.set("complete-random-str", random); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); conf.set("remove-not-significant", String.valueOf(removeNotSignificant)); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } if (hasScoreThreshold) { conf.set("score-threshold", scoreThreshold); } if (hasStrengthThreshold) { conf.set("strength-threshold", strengthThreshold); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(TopologyTimeSeriesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationMapper.class); job.setReducerClass(CorrelationReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, indexInputDirs.substring(0, indexInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(relationshipOutputDir)); job.setJarByClass(Relationship.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + relationshipsDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }
From source file:net.java.sen.tools.MkSenDic.java
/** * Build sen dictionary./*from w w w .j a v a2s . c o m*/ * * @param args * custom dictionary files. see dic/build.xml. */ public static void main(String args[]) { ResourceBundle rb = ResourceBundle.getBundle("dictionary"); DictionaryMaker dm1 = new DictionaryMaker(); DictionaryMaker dm2 = new DictionaryMaker(); DictionaryMaker dm3 = new DictionaryMaker(); // 1st field information of connect file. Vector rule1 = new Vector(); // 2nd field information of connect file. Vector rule2 = new Vector(); // 3rd field information of connect file. Vector rule3 = new Vector(); // 4th field information of connect file. // this field shows cost of morpheme connection // [size3*(x3*size2+x2)+x1] // [size3*(Attr1*size2+Attr2)+Attl] short score[] = new short[20131]; long start = System.currentTimeMillis(); // ///////////////////////////////////////// // // Step1. Loading connetion file. // log.info("(1/7): reading connection matrix ... "); try { log.info("connection file = " + rb.getString("text_connection_file")); log.info("charset = " + rb.getString("dic.charset")); CSVParser csvparser = new CSVParser(new FileInputStream(rb.getString("text_connection_file")), rb.getString("dic.charset")); String t[]; int line = 0; while ((t = csvparser.nextTokens()) != null) { if (t.length < 4) { log.warn("invalid line in " + rb.getString("text_connection_file") + ":" + line); log.warn(rb.getString("text_connection_file") + "may be broken."); break; } dm1.add(t[0]); rule1.add(t[0]); dm2.add(t[1]); rule2.add(t[1]); dm3.add(t[2]); rule3.add(t[2]); if (line == score.length) { score = resize(score); } score[line++] = (short) Integer.parseInt(t[3]); } // ///////////////////////////////////////// // // Step2. Building internal dictionary // log.info("(2/7): building type dictionary ... "); dm1.build(); dm2.build(); dm3.build(); // if you want check specified morpheme, you uncomment and modify // following line: /* * System.out.print("22="); dm3.getById(22); * System.out.print("368="); dm3.getById(368); * * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * DictionaryMaker.debug = true; * System.out.println(dm3.getDicId("?????*,*,*,*,?")); * System.out.println(dm3.getDicIdNoCache("?????*,*,*,*,?")); */ } catch (IOException e) { e.printStackTrace(); System.exit(0); } // ------------------------------------------------- int size1 = dm1.size(); int size2 = dm2.size(); int size3 = dm3.size(); int ruleSize = rule1.size(); short matrix[] = new short[size1 * size2 * size3]; short default_cost = (short) Integer.parseInt(rb.getString("default_connection_cost")); // ///////////////////////////////////////// // // Step3. Writing Connection Matrix // log.info("(3/7): writing conection matrix (" + size1 + " x " + size2 + " x " + size3 + " = " + size1 * size2 * size3 + ") ..."); for (int i = 0; i < (int) (size1 * size2 * size3); i++) matrix[i] = default_cost; for (int i = 0; i < ruleSize; i++) { Vector r1 = dm1.getRuleIdList((String) rule1.get(i)); Vector r2 = dm2.getRuleIdList((String) rule2.get(i)); Vector r3 = dm3.getRuleIdList((String) rule3.get(i)); for (Iterator i1 = r1.iterator(); i1.hasNext();) { int ii1 = ((Integer) i1.next()).intValue(); for (Iterator i2 = r2.iterator(); i2.hasNext();) { int ii2 = ((Integer) i2.next()).intValue(); for (Iterator i3 = r3.iterator(); i3.hasNext();) { int ii3 = ((Integer) i3.next()).intValue(); int pos = size3 * (size2 * ii1 + ii2) + ii3; matrix[pos] = score[i]; } } } } try { DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("matrix_file")))); out.writeShort(size1); out.writeShort(size2); out.writeShort(size3); for (int i1 = 0; i1 < size1; i1++) for (int i2 = 0; i2 < size2; i2++) for (int i3 = 0; i3 < size3; i3++) { out.writeShort(matrix[size3 * (size2 * i1 + i2) + i3]); // if (matrix[size3 * (size2 * i1 + i2) + i3] != // default_cost) { // } } out.close(); } catch (IOException e) { e.printStackTrace(); System.exit(0); } matrix = null; score = null; // ------------------------------------------------- int pos_start = Integer.parseInt(rb.getString("pos_start")); int pos_size = Integer.parseInt(rb.getString("pos_size")); int di = 0; int offset = 0; ArrayList dicList = new ArrayList(); // ///////////////////////////////////////// // // Step4. Reading Morpheme Information // log.info("(4/7): reading morpheme information ... "); String t = null; String[] csv = null; try { // writer for feature file. BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(rb.getString("pos_file")), rb.getString("sen.charset"))); log.info("load dic: " + rb.getString("text_dic_file")); BufferedReader dicStream = null; int custom_dic = -1; if (args.length == 0) { dicStream = new BufferedReader(new InputStreamReader( new FileInputStream(rb.getString("text_dic_file")), rb.getString("dic.charset"))); } else { custom_dic = 0; dicStream = new BufferedReader( new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } int line = 0; CSVData key_b = new CSVData(); CSVData pos_b = new CSVData(); while (true) { t = dicStream.readLine(); if (t == null) { dicStream.close(); custom_dic++; if (args.length == custom_dic) { break; } else { // read custum dictionary log.info("load dic: " + "args[custum_dic]"); dicStream = new BufferedReader(new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset"))); } continue; } CSVParser parser = new CSVParser(t); csv = parser.nextTokens(); if (csv.length < (pos_size + pos_start)) { throw new RuntimeException("format error:" + t); } key_b.clear(); pos_b.clear(); for (int i = pos_start; i < (pos_start + pos_size - 1); i++) { key_b.append(csv[i]); pos_b.append(csv[i]); } key_b.append(csv[pos_start + pos_size - 1]); pos_b.append(csv[pos_start + pos_size - 1]); for (int i = pos_start + pos_size; i < (csv.length - 1); i++) { pos_b.append(csv[i]); } pos_b.append(csv[csv.length - 1]); CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(key_b.toString()); token.rcAttr1 = (short) dm2.getDicId(key_b.toString()); token.lcAttr = (short) dm3.getDicId(key_b.toString()); token.posid = 0; token.posID = offset; token.length = (short) csv[0].length(); token.cost = (short) Integer.parseInt(csv[1]); dicList.add(new PairObject(csv[0], token)); byte b[] = pos_b.toString().getBytes(rb.getString("sen.charset")); offset += (b.length + 1); String pos_b_str = pos_b.toString(); bw.write(pos_b_str, 0, pos_b_str.length()); // bw.write(b, 0, b.length); bw.write(0); if (++di % 50000 == 0) log.info("" + di + "... "); } bw.close(); // ----end of writing feature.cha ---- } catch (Exception e) { log.error("Error: " + t); e.printStackTrace(); System.exit(1); } rule1 = null; rule2 = null; rule3 = null; // ///////////////////////////////////////// // // Step5. Sort lexs and write to file // log.info("(5/7): sorting lex... "); int value[] = new int[dicList.size()]; char key[][] = new char[dicList.size()][]; int spos = 0; int dsize = 0; int bsize = 0; String prev = ""; Collections.sort(dicList); // ///////////////////////////////////////// // // Step6. Writing Token Information // log.info("(6/7): writing token... "); try { // writer for token file. DataOutputStream out = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(rb.getString("token_file")))); // writing 'bos' and 'eos' and 'unknown' token. CToken token = new CToken(); token.rcAttr2 = (short) dm1.getDicId(rb.getString("bos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("bos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("bos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("eos_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("eos_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("eos_pos")); token.write(out); token.rcAttr2 = (short) dm1.getDicId(rb.getString("unknown_pos")); token.rcAttr1 = (short) dm2.getDicId(rb.getString("unknown_pos")); token.lcAttr = (short) dm3.getDicId(rb.getString("unknown_pos")); token.posID = -1; token.write(out); log.info("key size = " + key.length); for (int i = 0; i < key.length; i++) { String k = (String) ((PairObject) dicList.get(i)).key; if (!prev.equals(k) && i != 0) { key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; bsize = 1; spos = i; } else { bsize++; } prev = (String) ((PairObject) dicList.get(i)).key; ((CToken) (((PairObject) dicList.get(i)).value)).write(out); } out.flush(); out.close(); } catch (Exception e) { e.printStackTrace(); System.exit(1); } key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray(); value[dsize] = bsize + (spos << 8); dsize++; dm1 = null; dm2 = null; dm3 = null; dicList = null; // ///////////////////////////////////////// // // Step7. Build Double Array // log.info("(7/7): building Double-Array (size = " + dsize + ") ..."); DoubleArrayTrie da = new DoubleArrayTrie(); da.build(key, null, value, dsize); try { da.save(rb.getString("double_array_file")); } catch (Exception e) { e.printStackTrace(); } log.info("total time = " + (System.currentTimeMillis() - start) / 1000 + "[ms]"); }
From source file:ServerStatus.java
License:asdf
/** * @param args the command line arguments *///from w w w .jav a2 s. c o m public static void main(String[] args) throws InterruptedException, FileNotFoundException, IOException, ParseException { FileReader reader = null; ArrayList<BankInfo2> BankArray = new ArrayList<BankInfo2>(); reader = new FileReader(args[0]); JSONParser jp = new JSONParser(); JSONObject doc = (JSONObject) jp.parse(reader); JSONObject banks = (JSONObject) doc.get("banks"); //Set bankKeys = banks.keySet(); //Object [] bankNames = bankKeys.toArray(); Object[] bankNames = banks.keySet().toArray(); for (int i = 0; i < bankNames.length; i++) { //System.out.println(bankNames[i]); String bname = (String) bankNames[i]; BankInfo2 binfo = new BankInfo2(bname); JSONObject banki = (JSONObject) banks.get(bname); JSONArray chain = (JSONArray) banki.get("chain"); int chainLength = chain.size(); //System.out.println(chainLength); for (Object chain1 : chain) { JSONObject serv = (JSONObject) chain1; ServerInfo sinfo = new ServerInfo((String) serv.get("ip"), serv.get("port").toString(), serv.get("start_delay").toString(), serv.get("lifetime").toString(), serv.get("receive").toString(), serv.get("send").toString()); binfo.servers.add(sinfo); //System.out.println(serv.get("ip") + ":" + serv.get("port")); } BankArray.add(binfo); } //System.out.println("Done Processing Servers"); JSONArray clients = (JSONArray) doc.get("clients"); ArrayList<ClientInfo> clientsList = new ArrayList<ClientInfo>(); for (int i = 0; i < clients.size(); i++) { JSONObject client_i = (JSONObject) clients.get(i); //This is for hard coded requests in the json file //System.out.println(client_i); //System.out.println(client_i.getClass()); String typeOfClient = client_i.get("requests").getClass().toString(); //This is for a client that has hardCoded requests if (typeOfClient.equals("class org.json.simple.JSONArray")) { //System.out.println("JSONArray"); JSONArray requests = (JSONArray) client_i.get("requests"); ClientInfo c = new ClientInfo(client_i.get("reply_timeout").toString(), client_i.get("request_retries").toString(), client_i.get("resend_head").toString()); c.prob_failure = client_i.get("prob_failure").toString(); c.msg_send_delay = client_i.get("msg_delay").toString(); System.out.println( "Successfully added prob failure and msg_send " + c.prob_failure + "," + c.msg_send_delay); ArrayList<RequestInfo> req_list = new ArrayList<RequestInfo>(); for (int j = 0; j < requests.size(); j++) { JSONObject request_j = (JSONObject) requests.get(j); String req = request_j.get("request").toString(); String bank = request_j.get("" + "bank").toString(); String acc = request_j.get("account").toString(); String seq = request_j.get("seq_num").toString(); String amt = null; try { amt = request_j.get("amount").toString(); } catch (NullPointerException e) { //System.out.println("Amount not specified."); } RequestInfo r; if (amt == null) { r = new RequestInfo(req, bank, acc, seq); } else { r = new RequestInfo(req, bank, acc, amt, seq); } //RequestInfo r = new RequestInfo(request_j.get("request").toString(), request_j.get("bank").toString(), request_j.get("account").toString(), request_j.get("amount").toString()); req_list.add(r); } c.requests = req_list; c.PortNumber = 60000 + i; clientsList.add(c); //System.out.println(client_i); } //This is for Random client requests else if (typeOfClient.equals("class org.json.simple.JSONObject")) { JSONObject randomReq = (JSONObject) client_i.get("requests"); String seed = randomReq.get("seed").toString(); String num_requests = randomReq.get("num_requests").toString(); String prob_balance = randomReq.get("prob_balance").toString(); String prob_deposit = randomReq.get("prob_deposit").toString(); String prob_withdraw = randomReq.get("prob_withdrawal").toString(); String prob_transfer = randomReq.get("prob_transfer").toString(); //ClientInfo c = new ClientInfo(true, seed, num_requests, prob_balance, prob_deposit, prob_withdraw, prob_transfer); ClientInfo c = new ClientInfo(client_i.get("reply_timeout").toString(), client_i.get("request_retries").toString(), client_i.get("resend_head").toString(), seed, num_requests, prob_balance, prob_deposit, prob_withdraw, prob_transfer); c.PortNumber = 60000 + i; clientsList.add(c); } } //System.out.println(clients.size()); double lowerPercent = 0.0; double upperPercent = 1.0; double result; String bankChainInfoMaster = ""; for (int x = 0; x < BankArray.size(); x++) { BankInfo2 analyze = BankArray.get(x); String chain = analyze.bank_name + "#"; //analyze.servers for (int j = 0; j < analyze.servers.size(); j++) { if (analyze.servers.get(j).Start_delay.equals("0")) { if (j == 0) { chain += analyze.servers.get(j).Port; } else { chain += "#" + analyze.servers.get(j).Port; } } } if (x == 0) { bankChainInfoMaster += chain; } else { bankChainInfoMaster += "@" + chain; } } //System.out.println("CHAIN: "+ bankChainInfoMaster); String clientInfoMaster = ""; for (int x = 0; x < clientsList.size(); x++) { ClientInfo analyze = clientsList.get(x); if (x == 0) { clientInfoMaster += analyze.PortNumber; } else { clientInfoMaster += "#" + analyze.PortNumber; } } //System.out.println("Clients: "+ clientInfoMaster); //RUN MASTER HERE String MasterPort = "49999"; String masterExec = "java Master " + MasterPort + " " + clientInfoMaster + " " + bankChainInfoMaster; Process masterProcess = Runtime.getRuntime().exec(masterExec); System.out.println(masterExec); ArrayList<ServerInfoForClient> servInfoCli = new ArrayList<ServerInfoForClient>(); // List of all servers is saved so that we can wait for them to exit. ArrayList<Process> serverPros = new ArrayList<Process>(); //ArrayList<String> execServs = new ArrayList<String>(); for (int i = 0; i < BankArray.size(); i++) { BankInfo2 analyze = BankArray.get(i); //System.out.println(analyze.bank_name); //One server in the chain String execCmd = "java Server "; String hIP = "", hPort = "", tIP = "", tPort = "", bn = ""; bn = analyze.bank_name; boolean joinFlag = false; if (analyze.servers.size() == 2 && analyze.servers.get(1).Start_delay.equals("0")) { joinFlag = false; } else { joinFlag = true; } if (analyze.servers.size() == 1 && joinFlag == false) { //if(analyze.servers.size() == 1){ ServerInfo si = analyze.servers.get(0); execCmd += "HEAD_TAIL " + si.IP + ":" + si.Port; execCmd += " localhost:0 localhost:0 localhost:" + MasterPort + " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " " + analyze.bank_name; ; hIP = si.IP; hPort = si.Port; tIP = si.IP; tPort = si.Port; System.out.println(execCmd); Thread.sleep(500); Process pro = Runtime.getRuntime().exec(execCmd); serverPros.add(pro); //} } else if (analyze.servers.size() == 2 && joinFlag == true) { ServerInfo si = analyze.servers.get(0); execCmd += "HEAD_TAIL " + si.IP + ":" + si.Port; execCmd += " localhost:0 localhost:0 localhost:" + MasterPort + " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " " + analyze.bank_name; ; hIP = si.IP; hPort = si.Port; tIP = si.IP; tPort = si.Port; System.out.println(execCmd); Thread.sleep(500); Process pro = Runtime.getRuntime().exec(execCmd); serverPros.add(pro); execCmd = "java Server "; ServerInfo si2 = analyze.servers.get(1); execCmd += "TAIL " + si2.IP + ":" + si2.Port; execCmd += " localhost:0 localhost:0 localhost:" + MasterPort + " " + si2.Start_delay + " " + si2.Lifetime + " " + si2.Receive + " " + si2.Send + " " + analyze.bank_name; ; hIP = si.IP; hPort = si.Port; tIP = si.IP; tPort = si.Port; System.out.println(execCmd); Thread.sleep(500); Process pro2 = Runtime.getRuntime().exec(execCmd); serverPros.add(pro2); } else { int icount = 0; for (int x = 0; x < analyze.servers.size(); x++) { ServerInfo si = analyze.servers.get(x); if (si.Start_delay.equals("0")) { icount++; } } System.out.println("icount:" + icount); for (int j = 0; j < icount; j++) { //for(int j = 0; j < analyze.servers.size(); j++){ execCmd = "java Server "; ServerInfo si = analyze.servers.get(j); //Head server if (j == 0) { ServerInfo siSucc = analyze.servers.get(j + 1); execCmd += "HEAD " + si.IP + ":" + si.Port + " "; execCmd += "localhost:0 " + siSucc.IP + ":" + siSucc.Port + " localhost:" + MasterPort; execCmd += " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " " + analyze.bank_name; System.out.println(execCmd); hIP = si.IP; hPort = si.Port; } //Tail Server else if (j == (icount - 1)) {//analyze.servers.size() - 1) ){ ServerInfo siPred = analyze.servers.get(j - 1); execCmd += "TAIL " + si.IP + ":" + si.Port + " "; execCmd += siPred.IP + ":" + siPred.Port + " localhost:0 localhost:" + MasterPort; execCmd += " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " " + analyze.bank_name; tIP = si.IP; tPort = si.Port; System.out.println(execCmd); } //Middle Server else { ServerInfo siSucc = analyze.servers.get(j + 1); ServerInfo siPred = analyze.servers.get(j - 1); execCmd += "MIDDLE " + si.IP + ":" + si.Port + " "; execCmd += siPred.IP + ":" + siPred.Port + " " + siSucc.IP + ":" + siSucc.Port + " localhost:" + MasterPort; execCmd += " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " " + analyze.bank_name; System.out.println(execCmd); } Thread.sleep(500); Process pro = Runtime.getRuntime().exec(execCmd); serverPros.add(pro); } for (int j = icount; j < analyze.servers.size(); j++) { execCmd = "java Server "; ServerInfo si = analyze.servers.get(j); ServerInfo siPred = analyze.servers.get(j - 1); execCmd += "TAIL " + si.IP + ":" + si.Port + " "; execCmd += siPred.IP + ":" + siPred.Port + " localhost:0 localhost:" + MasterPort; execCmd += " " + si.Start_delay + " " + si.Lifetime + " " + si.Receive + " " + si.Send + " " + analyze.bank_name; tIP = si.IP; tPort = si.Port; System.out.println(execCmd); Thread.sleep(500); Process pro = Runtime.getRuntime().exec(execCmd); serverPros.add(pro); } } ServerInfoForClient newServInfoForCli = new ServerInfoForClient(hPort, hIP, tPort, tIP, bn); servInfoCli.add(newServInfoForCli); } String banksCliParam = ""; for (int i = 0; i < servInfoCli.size(); i++) { ServerInfoForClient temp = servInfoCli.get(i); String add = "@" + temp.bank_name + "#" + temp.HeadIP + ":" + temp.HeadPort + "#" + temp.TailIP + ":" + temp.TailPort; banksCliParam += add; } banksCliParam = banksCliParam.replaceFirst("@", ""); //System.out.println(banksCliParam); // List of clients is saved so that we can wait for them to exit. ArrayList<Process> clientPros = new ArrayList<Process>(); for (int i = 0; i < clientsList.size(); i++) { ClientInfo analyze = clientsList.get(i); String requestsString = ""; if (analyze.isRandom) { double balance = Double.parseDouble(analyze.prob_balance); //System.out.println(analyze.prob_balance); double deposit = Double.parseDouble(analyze.prob_deposit); double withdraw = Double.parseDouble(analyze.prob_withdraw); int numRequests = Integer.parseInt(analyze.num_requests); for (int j = 0; j < numRequests; j++) { result = Math.random() * (1.0 - 0.0) + 0.0; int randAccount = (int) (Math.random() * (10001 - 0) + 0); double randAmount = Math.random() * (10001.00 - 0.0) + 0; int adjustMoney = (int) randAmount * 100; randAmount = (double) adjustMoney / 100.00; int randBank = (int) (Math.random() * (bankNames.length - 0) + 0); if (result < balance) { //withdrawal#clientIPPORT%bank_name%accountnum%seq#amount requestsString += "@balance#localhost:" + analyze.PortNumber + "%" + bankNames[randBank] + "%" + randAccount + "%" + j; } else if (result < (deposit + balance)) { requestsString += "@deposit#localhost:" + analyze.PortNumber + "%" + bankNames[randBank] + "%" + randAccount + "%" + j + "#" + randAmount; } else { requestsString += "@withdrawal#localhost:" + analyze.PortNumber + "%" + bankNames[randBank] + "%" + randAccount + "%" + j + "#" + randAmount; } } } else { for (int j = 0; j < analyze.requests.size(); j++) { RequestInfo req = analyze.requests.get(j); //System.out.println("Sequence ###" + req.sequenceNum); if (req.request.equals("balance")) { requestsString += "@" + req.request + "#localhost:" + analyze.PortNumber + "%" + req.bankName + "%" + req.accountNum + "%" + req.sequenceNum; } else { requestsString += "@" + req.request + "#localhost:" + analyze.PortNumber + "%" + req.bankName + "%" + req.accountNum + "%" + req.sequenceNum + "#" + req.amount; } } } requestsString = requestsString.replaceFirst("@", ""); String execCommand; int p = 60000 + i; if (analyze.isRandom) { execCommand = "java Client localhost:" + p + " " + banksCliParam + " " + requestsString + " " + analyze.reply_timeout + " " + analyze.request_retries + " " + analyze.resend_head + " " + analyze.prob_failure + " " + analyze.msg_send_delay + " " + analyze.prob_balance + "," + analyze.prob_deposit + "," + analyze.prob_withdraw + "," + analyze.prob_transfer; } else { execCommand = "java Client localhost:" + p + " " + banksCliParam + " " + requestsString + " " + analyze.reply_timeout + " " + analyze.request_retries + " " + analyze.resend_head + " " + analyze.prob_failure + " " + analyze.msg_send_delay; } Thread.sleep(500); System.out.println(execCommand); System.out.println("Client " + (i + 1) + " started"); Process cliPro = Runtime.getRuntime().exec(execCommand); clientPros.add(cliPro); //System.out.println(requestsString); } // Wait for all the clients to terminate for (Process clientPro : clientPros) { try { clientPro.waitFor(); System.out.println("Client process finished."); } catch (InterruptedException e) { System.out.println("Interrupted while waiting for client."); } } // Sleep for two seconds Thread.sleep(2000); // Force termination of the servers for (Process serverPro : serverPros) { serverPro.destroy(); System.out.println("Killed server."); } masterProcess.destroy(); System.out.println("Killed Master"); //System.out.println("asdf"); }
From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java
/** * @param args//from ww w. j a va 2 s .co m */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the aggregate functions " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions" + " will be computed, followed by their temporal and spatial attribute indices"); gOption.setRequired(true); gOption.setArgName("GROUP"); gOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(gOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } String datasetNames = ""; String datasetIds = ""; String preProcessingDatasets = ""; ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> shortDatasetAggregation = new ArrayList<String>(); HashMap<String, String> datasetTempAtt = new HashMap<String, String>(); HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>(); HashMap<String, String> preProcessingDataset = new HashMap<String, String>(); HashMap<String, String> datasetId = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] datasetArgs = cmd.getOptionValues("g"); for (int i = 0; i < datasetArgs.length; i += 3) { String dataset = datasetArgs[i]; // getting pre-processing String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3); if (tempPreProcessing == null) { System.out.println("No pre-processing available for " + dataset); continue; } preProcessingDataset.put(dataset, tempPreProcessing); shortDataset.add(dataset); datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1])); datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2])); datasetId.put(dataset, null); } if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } // getting dataset id Path path = null; FileSystem fs = null; if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { fs = FileSystem.get(new Configuration()); path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3); // getting smallest resolution HashMap<String, String> tempResMap = new HashMap<String, String>(); HashMap<String, String> spatialResMap = new HashMap<String, String>(); HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>(); HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>(); HashSet<String> input = new HashSet<String>(); for (String dataset : shortDataset) { String[] datasetArray = preProcessingDataset.get(dataset).split("-"); String datasetTemporalStr = datasetArray[datasetArray.length - 2]; int datasetTemporal = utils.temporalResolution(datasetTemporalStr); String datasetSpatialStr = datasetArray[datasetArray.length - 1]; int datasetSpatial = utils.spatialResolution(datasetSpatialStr); // finding all possible resolutions String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal); String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial); String temporalResolution = ""; String spatialResolution = ""; String tempRes = ""; String spatialRes = ""; boolean dataAdded = false; for (int i = 0; i < temporalResolutions.length; i++) { for (int j = 0; j < spatialResolutions.length; j++) { temporalResolution = temporalResolutions[i]; spatialResolution = spatialResolutions[j]; String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) { dataAdded = true; tempRes += temporalResolution + "-"; spatialRes += spatialResolution + "-"; } } } if (dataAdded) { input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); shortDatasetAggregation.add(dataset); tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1)); spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1)); datasetTemporalStrMap.put(dataset, datasetTemporalStr); datasetSpatialStrMap.put(dataset, datasetSpatialStr); } } if (input.isEmpty()) { System.out.println("All the input datasets have aggregates."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } it = input.iterator(); while (it.hasNext()) { preProcessingDatasets += it.next() + ","; } Job aggJob = null; String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/"; String jobName = "aggregates"; FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3); Configuration aggConf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); aggConf.set("dataset-name", datasetNames); aggConf.set("dataset-id", datasetIds); for (int i = 0; i < shortDatasetAggregation.size(); i++) { String dataset = shortDatasetAggregation.get(i); String id = datasetId.get(dataset); aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset)); aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset)); aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset)); aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset)); aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset)); aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset)); if (s3) aggConf.set("dataset-" + id, s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); else aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/" + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); } aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0"); aggConf.set("mapreduce.task.io.sort.mb", "200"); aggConf.set("mapreduce.task.io.sort.factor", "100"); machineConf.setMachineConfiguration(aggConf); if (s3) { machineConf.setMachineConfiguration(aggConf); aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); } if (snappyCompression) { aggConf.set("mapreduce.map.output.compress", "true"); aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { aggConf.set("mapreduce.map.output.compress", "true"); aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } aggJob = new Job(aggConf); aggJob.setJobName(jobName); aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class); aggJob.setMapOutputValueClass(AggregationArrayWritable.class); aggJob.setOutputKeyClass(SpatioTemporalWritable.class); aggJob.setOutputValueClass(FloatArrayWritable.class); //aggJob.setOutputKeyClass(Text.class); //aggJob.setOutputValueClass(Text.class); aggJob.setMapperClass(AggregationMapper.class); aggJob.setCombinerClass(AggregationCombiner.class); aggJob.setReducerClass(AggregationReducer.class); aggJob.setNumReduceTasks(machineConf.getNumberReduces()); aggJob.setInputFormatClass(SequenceFileInputFormat.class); //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class); LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class); //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(aggJob, true); SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK); FileInputFormat.setInputDirRecursive(aggJob, true); FileInputFormat.setInputPaths(aggJob, preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1)); FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir)); aggJob.setJarByClass(Aggregation.class); long start = System.currentTimeMillis(); aggJob.submit(); aggJob.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (String dataset : shortDatasetAggregation) { String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/"; String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } }
From source file:DIA_Umpire_Quant.DIA_Umpire_Quant.java
/** * @param args the command line arguments *///from w w w.jav a2 s.co m public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire quantitation with targeted re-extraction analysis (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, it should be like: java -jar -Xmx10G DIA_Umpire_Quant.jar diaumpire_quant.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_quant.log"); } catch (Exception e) { } try { Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; String UserMod = ""; String Combined_Prot = ""; String InternalLibID = ""; String ExternalLibPath = ""; String ExternalLibDecoyTag = "DECOY"; boolean DefaultProtFiltering = true; boolean DataSetLevelPepFDR = false; float ProbThreshold = 0.99f; float ExtProbThreshold = 0.99f; float Freq = 0f; int TopNPep = 6; int TopNFrag = 6; float MinFragMz = 200f; String FilterWeight = "GW"; float MinWeight = 0.9f; float RTWindow_Int = -1f; float RTWindow_Ext = -1f; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); boolean InternalLibSearch = false; boolean ExternalLibSearch = false; boolean ExportSaint = false; boolean SAINT_MS1 = false; boolean SAINT_MS2 = true; HashMap<String, String[]> BaitList = new HashMap<>(); HashMap<String, String> BaitName = new HashMap<>(); HashMap<String, String[]> ControlList = new HashMap<>(); HashMap<String, String> ControlName = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "TargetedExtraction": { InternalLibSearch = Boolean.parseBoolean(value); break; } case "InternalLibSearch": { InternalLibSearch = Boolean.parseBoolean(value); break; } case "ExternalLibSearch": { ExternalLibSearch = Boolean.parseBoolean(value); break; } case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "Fasta": { tandemPara.FastaPath = value; break; } case "Combined_Prot": { Combined_Prot = value; break; } case "DefaultProtFiltering": { DefaultProtFiltering = Boolean.parseBoolean(value); break; } case "DecoyPrefix": { if (!"".equals(value)) { tandemPara.DecoyPrefix = value; } break; } case "UserMod": { UserMod = value; break; } case "ProteinFDR": { tandemPara.ProtFDR = Float.parseFloat(value); break; } case "PeptideFDR": { tandemPara.PepFDR = Float.parseFloat(value); break; } case "DataSetLevelPepFDR": { DataSetLevelPepFDR = Boolean.parseBoolean(value); break; } case "InternalLibID": { InternalLibID = value; break; } case "ExternalLibPath": { ExternalLibPath = value; break; } case "ExtProbThreshold": { ExtProbThreshold = Float.parseFloat(value); break; } case "RTWindow_Int": { RTWindow_Int = Float.parseFloat(value); break; } case "RTWindow_Ext": { RTWindow_Ext = Float.parseFloat(value); break; } case "ExternalLibDecoyTag": { ExternalLibDecoyTag = value; if (ExternalLibDecoyTag.endsWith("_")) { ExternalLibDecoyTag = ExternalLibDecoyTag.substring(0, ExternalLibDecoyTag.length() - 1); } break; } case "ProbThreshold": { ProbThreshold = Float.parseFloat(value); break; } case "ReSearchProb": { //ReSearchProb = Float.parseFloat(value); break; } case "FilterWeight": { FilterWeight = value; break; } case "MinWeight": { MinWeight = Float.parseFloat(value); break; } case "TopNFrag": { TopNFrag = Integer.parseInt(value); break; } case "TopNPep": { TopNPep = Integer.parseInt(value); break; } case "Freq": { Freq = Float.parseFloat(value); break; } case "MinFragMz": { MinFragMz = Float.parseFloat(value); break; } //<editor-fold defaultstate="collapsed" desc="SaintOutput"> case "ExportSaintInput": { ExportSaint = Boolean.parseBoolean(value); break; } case "QuantitationType": { switch (value) { case "MS1": { SAINT_MS1 = true; SAINT_MS2 = false; break; } case "MS2": { SAINT_MS1 = false; SAINT_MS2 = true; break; } case "BOTH": { SAINT_MS1 = true; SAINT_MS2 = true; break; } } break; } // case "BaitInputFile": { // SaintBaitFile = value; // break; // } // case "PreyInputFile": { // SaintPreyFile = value; // break; // } // case "InterationInputFile": { // SaintInteractionFile = value; // break; // } default: { if (type.startsWith("BaitName_")) { BaitName.put(type.substring(9), value); } if (type.startsWith("BaitFile_")) { BaitList.put(type.substring(9), value.split("\t")); } if (type.startsWith("ControlName_")) { ControlName.put(type.substring(12), value); } if (type.startsWith("ControlFile_")) { ControlList.put(type.substring(12), value.split("\t")); } break; } //</editor-fold> } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); if (!UserMod.equals("")) { PTMManager.GetInstance().ImportUserMod(UserMod); } //Check if the fasta file can be found if (!new File(tandemPara.FastaPath).exists()) { Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath + " cannot be found, the process will be terminated, please check."); System.exit(1); } //Check if the prot.xml file can be found if (!new File(Combined_Prot).exists()) { Logger.getRootLogger().info("ProtXML file: " + Combined_Prot + " cannot be found, the export protein summary table will be empty."); } LCMSID protID = null; //Parse prot.xml and generate protein master list given an FDR if (Combined_Prot != null && !Combined_Prot.equals("")) { protID = LCMSID.ReadLCMSIDSerialization(Combined_Prot); if (!"".equals(Combined_Prot) && protID == null) { protID = new LCMSID(Combined_Prot, tandemPara.DecoyPrefix, tandemPara.FastaPath); ProtXMLParser protxmlparser = new ProtXMLParser(protID, Combined_Prot, 0f); //Use DIA-Umpire default protein FDR calculation if (DefaultProtFiltering) { protID.RemoveLowLocalPWProtein(0.8f); protID.RemoveLowMaxIniProbProtein(0.9f); protID.FilterByProteinDecoyFDRUsingMaxIniProb(tandemPara.DecoyPrefix, tandemPara.ProtFDR); } //Get protein FDR calculation without other filtering else { protID.FilterByProteinDecoyFDRUsingLocalPW(tandemPara.DecoyPrefix, tandemPara.ProtFDR); } protID.LoadSequence(); protID.WriteLCMSIDSerialization(Combined_Prot); } Logger.getRootLogger().info("Protein No.:" + protID.ProteinList.size()); } HashMap<String, HashMap<String, FragmentPeak>> IDSummaryFragments = new HashMap<>(); //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); FileList.add(DiaFile); HashMap<String, FragmentPeak> FragMap = new HashMap<>(); IDSummaryFragments.put(FilenameUtils.getBaseName(mzXMLFile), FragMap); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } } } LCMSID combinePepID = null; if (DataSetLevelPepFDR) { combinePepID = LCMSID.ReadLCMSIDSerialization(WorkFolder + "combinePepID.SerFS"); if (combinePepID == null) { FDR_DataSetLevel fdr = new FDR_DataSetLevel(); fdr.GeneratePepIonList(FileList, tandemPara, WorkFolder + "combinePepID.SerFS"); combinePepID = fdr.combineID; combinePepID.WriteLCMSIDSerialization(WorkFolder + "combinePepID.SerFS"); } } //process each DIA file for quantification based on untargeted identifications for (DIAPack DiaFile : FileList) { long time = System.currentTimeMillis(); Logger.getRootLogger().info("Loading identification results " + DiaFile.Filename + "...."); //If the LCMSID serialization is found if (!DiaFile.ReadSerializedLCMSID()) { DiaFile.ParsePepXML(tandemPara, combinePepID); DiaFile.BuildStructure(); if (!DiaFile.MS1FeatureMap.ReadPeakCluster()) { Logger.getRootLogger().info("Loading peak and structure failed, job is incomplete"); System.exit(1); } DiaFile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster(); //Generate mapping between index of precursor feature and pseudo MS/MS scan index DiaFile.GenerateClusterScanNomapping(); //Doing quantification DiaFile.AssignQuant(); DiaFile.ClearStructure(); } DiaFile.IDsummary.ReduceMemoryUsage(); time = System.currentTimeMillis() - time; Logger.getRootLogger().info(DiaFile.Filename + " processed time:" + String.format("%d hour, %d min, %d sec", TimeUnit.MILLISECONDS.toHours(time), TimeUnit.MILLISECONDS.toMinutes(time) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(time)), TimeUnit.MILLISECONDS.toSeconds(time) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(time)))); } //<editor-fold defaultstate="collapsed" desc="Targete re-extraction using internal library"> Logger.getRootLogger().info( "================================================================================================="); if (InternalLibSearch && FileList.size() > 1) { Logger.getRootLogger().info("Module C: Targeted extraction using internal library"); FragmentLibManager libManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder, InternalLibID); if (libManager == null) { Logger.getRootLogger().info("Building internal spectral library"); libManager = new FragmentLibManager(InternalLibID); ArrayList<LCMSID> LCMSIDList = new ArrayList<>(); for (DIAPack dia : FileList) { LCMSIDList.add(dia.IDsummary); } libManager.ImportFragLibTopFrag(LCMSIDList, Freq, TopNFrag); libManager.WriteFragmentLibSerialization(WorkFolder); } libManager.ReduceMemoryUsage(); Logger.getRootLogger() .info("Building retention time prediction model and generate candidate peptide list"); for (int i = 0; i < FileList.size(); i++) { FileList.get(i).IDsummary.ClearMappedPep(); } for (int i = 0; i < FileList.size(); i++) { for (int j = i + 1; j < FileList.size(); j++) { RTAlignedPepIonMapping alignment = new RTAlignedPepIonMapping(WorkFolder, FileList.get(i).GetParameter(), FileList.get(i).IDsummary, FileList.get(j).IDsummary); alignment.GenerateModel(); alignment.GenerateMappedPepIon(); } FileList.get(i).ExportID(); FileList.get(i).IDsummary = null; } Logger.getRootLogger().info("Targeted matching........"); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); } if (!diafile.IDsummary.GetMappedPepIonList().isEmpty()) { diafile.UseMappedIon = true; diafile.FilterMappedIonByProb = false; diafile.BuildStructure(); diafile.MS1FeatureMap.ReadPeakCluster(); diafile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster(); diafile.GenerateMassCalibrationRTMap(); diafile.TargetedExtractionQuant(false, libManager, 1.1f, RTWindow_Int); diafile.MS1FeatureMap.ClearAllPeaks(); diafile.IDsummary.ReduceMemoryUsage(); diafile.IDsummary.RemoveLowProbMappedIon(ProbThreshold); diafile.ExportID(); Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size() + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size()); diafile.ClearStructure(); } diafile.IDsummary = null; System.gc(); } Logger.getRootLogger().info( "================================================================================================="); } //</editor-fold> //<editor-fold defaultstate="collapsed" desc="Targeted re-extraction using external library"> //External library search if (ExternalLibSearch) { Logger.getRootLogger().info("Module C: Targeted extraction using external library"); //Read exteranl library FragmentLibManager ExlibManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder, FilenameUtils.getBaseName(ExternalLibPath)); if (ExlibManager == null) { ExlibManager = new FragmentLibManager(FilenameUtils.getBaseName(ExternalLibPath)); //Import traML file ExlibManager.ImportFragLibByTraML(ExternalLibPath, ExternalLibDecoyTag); //Check if there are decoy spectra ExlibManager.CheckDecoys(); //ExlibManager.ImportFragLibBySPTXT(ExternalLibPath); ExlibManager.WriteFragmentLibSerialization(WorkFolder); } Logger.getRootLogger() .info("No. of peptide ions in external lib:" + ExlibManager.PeptideFragmentLib.size()); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); } //Generate RT mapping RTMappingExtLib RTmap = new RTMappingExtLib(diafile.IDsummary, ExlibManager, diafile.GetParameter()); RTmap.GenerateModel(); RTmap.GenerateMappedPepIon(); diafile.BuildStructure(); diafile.MS1FeatureMap.ReadPeakCluster(); diafile.GenerateMassCalibrationRTMap(); //Perform targeted re-extraction diafile.TargetedExtractionQuant(false, ExlibManager, ProbThreshold, RTWindow_Ext); diafile.MS1FeatureMap.ClearAllPeaks(); diafile.IDsummary.ReduceMemoryUsage(); //Remove target IDs below the defined probability threshold diafile.IDsummary.RemoveLowProbMappedIon(ExtProbThreshold); diafile.ExportID(); diafile.ClearStructure(); Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size() + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size()); } } //</editor-fold> //<editor-fold defaultstate="collapsed" desc="Peptide and fragment selection"> Logger.getRootLogger().info("Peptide and fragment selection across the whole dataset"); ArrayList<LCMSID> SummaryList = new ArrayList<>(); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); diafile.IDsummary.ClearAssignPeakCluster(); //diafile.IDsummary.ClearPSMs(); } if (protID != null) { //Generate protein list according to mapping of peptide ions for each DIA file to the master protein list diafile.IDsummary.GenerateProteinByRefIDByPepSeq(protID, true); diafile.IDsummary.ReMapProPep(); } if ("GW".equals(FilterWeight)) { diafile.IDsummary.SetFilterByGroupWeight(); } else if ("PepW".equals(FilterWeight)) { diafile.IDsummary.SetFilterByWeight(); } SummaryList.add(diafile.IDsummary); } FragmentSelection fragselection = new FragmentSelection(SummaryList); fragselection.freqPercent = Freq; fragselection.MinFragMZ = MinFragMz; fragselection.GeneratePepFragScoreMap(); fragselection.GenerateTopFragMap(TopNFrag); fragselection.GenerateProtPepScoreMap(MinWeight); fragselection.GenerateTopPepMap(TopNPep); //</editor-fold> //<editor-fold defaultstate="collapsed" desc="Writing general reports"> ExportTable export = new ExportTable(WorkFolder, SummaryList, IDSummaryFragments, protID, fragselection); export.Export(TopNPep, TopNFrag, Freq); //</editor-fold> //<editor-fold defaultstate="collapsed" desc="//<editor-fold defaultstate="collapsed" desc="Generate SAINT input files"> if (ExportSaint && protID != null) { HashMap<String, DIAPack> Filemap = new HashMap<>(); for (DIAPack DIAfile : FileList) { Filemap.put(DIAfile.GetBaseName(), DIAfile); } FileWriter baitfile = new FileWriter(WorkFolder + "SAINT_Bait_" + DateTimeTag.GetTag() + ".txt"); FileWriter preyfile = new FileWriter(WorkFolder + "SAINT_Prey_" + DateTimeTag.GetTag() + ".txt"); FileWriter interactionfileMS1 = null; FileWriter interactionfileMS2 = null; if (SAINT_MS1) { interactionfileMS1 = new FileWriter( WorkFolder + "SAINT_Interaction_MS1_" + DateTimeTag.GetTag() + ".txt"); } if (SAINT_MS2) { interactionfileMS2 = new FileWriter( WorkFolder + "SAINT_Interaction_MS2_" + DateTimeTag.GetTag() + ".txt"); } HashMap<String, String> PreyID = new HashMap<>(); for (String samplekey : ControlName.keySet()) { String name = ControlName.get(samplekey); for (String file : ControlList.get(samplekey)) { baitfile.write(FilenameUtils.getBaseName(file) + "\t" + name + "\t" + "C\n"); LCMSID IDsummary = Filemap.get(FilenameUtils.getBaseName(file)).IDsummary; if (SAINT_MS1) { SaintOutput(protID, IDsummary, fragselection, interactionfileMS1, file, name, PreyID, 1); } if (SAINT_MS2) { SaintOutput(protID, IDsummary, fragselection, interactionfileMS2, file, name, PreyID, 2); } } } for (String samplekey : BaitName.keySet()) { String name = BaitName.get(samplekey); for (String file : BaitList.get(samplekey)) { baitfile.write(FilenameUtils.getBaseName(file) + "\t" + name + "\t" + "T\n"); LCMSID IDsummary = Filemap.get(FilenameUtils.getBaseName(file)).IDsummary; if (SAINT_MS1) { SaintOutput(protID, IDsummary, fragselection, interactionfileMS1, file, name, PreyID, 1); } if (SAINT_MS2) { SaintOutput(protID, IDsummary, fragselection, interactionfileMS2, file, name, PreyID, 2); } } } baitfile.close(); if (SAINT_MS1) { interactionfileMS1.close(); } if (SAINT_MS2) { interactionfileMS2.close(); } for (String AccNo : PreyID.keySet()) { preyfile.write(AccNo + "\t" + PreyID.get(AccNo) + "\n"); } preyfile.close(); } //</editor-fold> Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); } catch (Exception e) { Logger.getRootLogger().error(ExceptionUtils.getStackTrace(e)); throw e; } }