List of usage examples for java.util HashMap put
public V put(K key, V value)
From source file:edu.nyu.vida.data_polygamy.relationship_computation.Relationship.java
/** * @param args/* w w w. j av a2 s .c om*/ * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option scoreOption = new Option("sc", "score", true, "set threhsold for relationship score"); scoreOption.setRequired(false); scoreOption.setArgName("SCORE THRESHOLD"); options.addOption(scoreOption); Option strengthOption = new Option("st", "strength", true, "set threhsold for relationship strength"); strengthOption.setRequired(false); strengthOption.setArgName("STRENGTH THRESHOLD"); options.addOption(strengthOption); Option completeRandomizationOption = new Option("c", "complete-randomization", false, "use complete randomization when performing significance tests"); completeRandomizationOption.setRequired(false); options.addOption(completeRandomizationOption); Option idOption = new Option("id", "ids", false, "output id instead of names for datasets and attributes"); idOption.setRequired(false); options.addOption(idOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); Option removeOption = new Option("r", "remove-not-significant", false, "remove relationships that are not" + "significant from the final output"); removeOption.setRequired(false); options.addOption(removeOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.relationship_computation.Relationship", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeNotSignificant = cmd.hasOption("r"); boolean removeExistingFiles = cmd.hasOption("f"); boolean completeRandomization = cmd.hasOption("c"); boolean hasScoreThreshold = cmd.hasOption("sc"); boolean hasStrengthThreshold = cmd.hasOption("st"); boolean outputIds = cmd.hasOption("id"); String scoreThreshold = hasScoreThreshold ? cmd.getOptionValue("sc") : ""; String strengthThreshold = hasStrengthThreshold ? cmd.getOptionValue("st") : ""; // all datasets ArrayList<String> all_datasets = new ArrayList<String>(); if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { all_datasets.add(line.split("\t")[0]); line = br.readLine(); } br.close(); if (s3) fs.close(); String[] all_datasets_array = new String[all_datasets.size()]; all_datasets.toArray(all_datasets_array); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : all_datasets_array; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("No indices from datasets in G1."); System.exit(0); } if (secondGroup.isEmpty()) { System.out.println("No indices from datasets in G2."); System.exit(0); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); all_datasets.add(dt[0]); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); String relationshipsDir = ""; if (outputIds) { relationshipsDir = FrameworkUtils.relationshipsIdsDir; } else { relationshipsDir = FrameworkUtils.relationshipsDir; } FrameworkUtils.createDir(s3bucket + relationshipsDir, s3conf, s3); String random = completeRandomization ? "complete" : "restricted"; String indexInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.indexDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { indexInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "relationship" + "-" + random; String relationshipOutputDir = s3bucket + relationshipsDir + "/tmp/"; FrameworkUtils.removeFile(relationshipOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("complete-random", String.valueOf(completeRandomization)); conf.set("output-ids", String.valueOf(outputIds)); conf.set("complete-random-str", random); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); conf.set("remove-not-significant", String.valueOf(removeNotSignificant)); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } if (hasScoreThreshold) { conf.set("score-threshold", scoreThreshold); } if (hasStrengthThreshold) { conf.set("strength-threshold", strengthThreshold); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(TopologyTimeSeriesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationMapper.class); job.setReducerClass(CorrelationReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, indexInputDirs.substring(0, indexInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(relationshipOutputDir)); job.setJarByClass(Relationship.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + relationshipsDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + relationshipsDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }
From source file:com.sebuilder.interpreter.SeInterpreter.java
public static void main(String[] args) { if (args.length == 0) { System.out.println(// w w w . j a v a2s.c o m "Usage: [--driver=<drivername] [--driver.<configkey>=<configvalue>...] [--implicitlyWait=<ms>] [--pageLoadTimeout=<ms>] [--stepTypePackage=<package name>] <script path>..."); System.exit(0); } Log log = LogFactory.getFactory().getInstance(SeInterpreter.class); WebDriverFactory wdf = DEFAULT_DRIVER_FACTORY; ScriptFactory sf = new ScriptFactory(); StepTypeFactory stf = new StepTypeFactory(); sf.setStepTypeFactory(stf); TestRunFactory trf = new TestRunFactory(); sf.setTestRunFactory(trf); ArrayList<String> paths = new ArrayList<String>(); HashMap<String, String> driverConfig = new HashMap<String, String>(); for (String s : args) { if (s.startsWith("--")) { String[] kv = s.split("=", 2); if (kv.length < 2) { log.fatal("Driver configuration option \"" + s + "\" is not of the form \"--driver=<name>\" or \"--driver.<key>=<value\"."); System.exit(1); } if (s.startsWith("--implicitlyWait")) { trf.setImplicitlyWaitDriverTimeout(Integer.parseInt(kv[1])); } else if (s.startsWith("--pageLoadTimeout")) { trf.setPageLoadDriverTimeout(Integer.parseInt(kv[1])); } else if (s.startsWith("--stepTypePackage")) { stf.setPrimaryPackage(kv[1]); } else if (s.startsWith("--driver.")) { driverConfig.put(kv[0].substring("--driver.".length()), kv[1]); } else if (s.startsWith("--driver")) { try { wdf = (WebDriverFactory) Class .forName("com.sebuilder.interpreter.webdriverfactory." + kv[1]).newInstance(); } catch (ClassNotFoundException e) { log.fatal("Unknown WebDriverFactory: " + "com.sebuilder.interpreter.webdriverfactory." + kv[1], e); } catch (InstantiationException e) { log.fatal("Could not instantiate WebDriverFactory " + "com.sebuilder.interpreter.webdriverfactory." + kv[1], e); } catch (IllegalAccessException e) { log.fatal("Could not instantiate WebDriverFactory " + "com.sebuilder.interpreter.webdriverfactory." + kv[1], e); } } else { paths.add(s); } } else { paths.add(s); } } if (paths.isEmpty()) { log.info("Configuration successful but no paths to scripts specified. Exiting."); System.exit(0); } HashMap<String, String> cfg = new HashMap<String, String>(driverConfig); for (String path : paths) { try { TestRun lastRun = null; for (Script script : sf.parse(new File(path))) { for (Map<String, String> data : script.dataRows) { try { lastRun = script.testRunFactory.createTestRun(script, log, wdf, driverConfig, data, lastRun); if (lastRun.finish()) { log.info(script.name + " succeeded"); } else { log.info(script.name + " failed"); } } catch (Exception e) { log.info(script.name + " failed", e); } } } } catch (Exception e) { log.fatal("Run error.", e); System.exit(1); } } }
From source file:enrichment.Disambiguate.java
/**prerequisites: * cd silk_2.5.3/*_links// w w w . j av a 2 s . c o m * cat *.nt|sort -t' ' -k3 > $filename * * @param args $filename * @throws IOException * @throws URISyntaxException */ public static void main(String[] args) { File file = new File(args[0]); if (file.isDirectory()) { args = file.list(new OnlyExtFilenameFilter("nt")); } BufferedReader in; for (int q = 0; q < args.length; q++) { String filename = null; if (file.isDirectory()) { filename = file.getPath() + File.separator + args[q]; } else { filename = args[q]; } try { FileWriter output = new FileWriter(filename + "_disambiguated.nt"); String prefix = "@prefix rdrel: <http://rdvocab.info/RDARelationshipsWEMI/> .\n" + "@prefix dbpedia: <http://de.dbpedia.org/resource/> .\n" + "@prefix frbr: <http://purl.org/vocab/frbr/core#> .\n" + "@prefix lobid: <http://lobid.org/resource/> .\n" + "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n" + "@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n" + "@prefix mo: <http://purl.org/ontology/mo/> .\n" + "@prefix wikipedia: <https://de.wikipedia.org/wiki/> ."; output.append(prefix + "\n\n"); in = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); HashMap<String, HashMap<String, ArrayList<String>>> hm = new HashMap<String, HashMap<String, ArrayList<String>>>(); String s; HashMap<String, ArrayList<String>> hmLobid = new HashMap<String, ArrayList<String>>(); Stack<String> old_object = new Stack<String>(); while ((s = in.readLine()) != null) { String[] triples = s.split(" "); String object = triples[2].substring(1, triples[2].length() - 1); if (old_object.size() > 0 && !old_object.firstElement().equals(object)) { hmLobid = new HashMap<String, ArrayList<String>>(); old_object = new Stack<String>(); } old_object.push(object); String subject = triples[0].substring(1, triples[0].length() - 1); System.out.print("\nSubject=" + object); System.out.print("\ntriples[2]=" + triples[2]); hmLobid.put(subject, getAllCreators(new URI(subject))); hm.put(object, hmLobid); } // get all dbpedia resources for (String key_one : hm.keySet()) { System.out.print("\n==============\n==== " + key_one + "\n==============="); int resources_cnt = hm.get(key_one).keySet().size(); ArrayList<String>[] creators = new ArrayList[resources_cnt]; HashMap<String, Integer> creators_backed = new HashMap<String, Integer>(); int x = 0; // get all lobid_resources subsumed under the dbpedia resource for (String subject_uri : hm.get(key_one).keySet()) { creators[x] = new ArrayList<String>(); System.out.print("\n subject_uri=" + subject_uri); Iterator<String> ite = hm.get(key_one).get(subject_uri).iterator(); int y = 0; // get all creators of the lobid resource while (ite.hasNext()) { String creator = ite.next(); System.out.print("\n " + creator); if (creators_backed.containsKey(creator)) { y = creators_backed.get(creator); } else { y = creators_backed.size(); creators_backed.put(creator, y); } while (creators[x].size() <= y) { creators[x].add("-"); } creators[x].set(y, creator); y++; } x++; } if (creators_backed.size() == 1) { System.out .println("\n" + "Every resource pointing to " + key_one + " has the same creator!"); for (String key_two : hm.get(key_one).keySet()) { output.append("<" + key_two + "> rdrel:workManifested <" + key_one + "> .\n"); output.append("<" + key_two + "> mo:wikipedia <" + key_one.replaceAll("dbpedia\\.org/resource", "wikipedia\\.org/wiki") + "> .\n"); } } /*else { for (int a = 0; a < creators.length; a++) { System.out.print(creators[a].toString()+","); } }*/ } output.flush(); if (output != null) { output.close(); } } catch (Exception e) { System.out.print("Exception while working on " + filename + ": \n"); e.printStackTrace(System.out); } } }
From source file:in.sc.dao.ProductHelper.java
public static void main(String[] args) { HashMap a = new HashMap(); ArrayList brandL = new ArrayList(); brandL.add("apple"); brandL.add("sony"); // a.put(category, 137); // a.put(country, 1); a.put(brand, brandL); String whereC = " AND (CAST(TRIM(f13) AS DECIMAL(10,2)) =1 or CAST(TRIM(f13) AS DECIMAL(10,2)) =2) and CAST(TRIM(f14) AS DECIMAL(10,2)) BETWEEN 1.3 AND 2.7 "; // a.put(unique_id, "panasonic-p66-mega"); a.put(from, 0);/*from w w w.ja va 2 s .c om*/ a.put(whereclause, whereC); try { ArrayList<ProductBean> data = (ArrayList) new ProductHelper().getProductLists1(a); int cnt = 0; for (Object o : data) { // System.out.println("" + o); cnt++; } // System.out.println("count " + cnt); } catch (Exception e) { e.printStackTrace(); } }
From source file:DIA_Umpire_Quant.DIA_Umpire_LCMSIDGen.java
/** * @param args the command line arguments *///from w w w . java 2 s .com public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire LCMSID geneartor (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be: java -jar -Xmx10G DIA_Umpire_LCMSIDGen.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_lcmsidgen.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "DecoyPrefix": { if (!"".equals(value)) { tandemPara.DecoyPrefix = value; } break; } case "PeptideFDR": { tandemPara.PepFDR = Float.parseFloat(value); break; } } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } //process each DIA file to genearate untargeted identifications for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { long time = System.currentTimeMillis(); DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); FileList.add(DiaFile); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); DiaFile.ParsePepXML(tandemPara, null); DiaFile.BuildStructure(); if (!DiaFile.MS1FeatureMap.ReadPeakCluster()) { Logger.getRootLogger().info("Loading peak and structure failed, job is incomplete"); System.exit(1); } DiaFile.MS1FeatureMap.ClearMonoisotopicPeakOfCluster(); //Generate mapping between index of precursor feature and pseudo MS/MS scan index DiaFile.GenerateClusterScanNomapping(); //Doing quantification DiaFile.AssignQuant(); DiaFile.ClearStructure(); DiaFile.IDsummary.ReduceMemoryUsage(); time = System.currentTimeMillis() - time; Logger.getRootLogger().info(mzXMLFile + " processed time:" + String.format("%d hour, %d min, %d sec", TimeUnit.MILLISECONDS.toHours(time), TimeUnit.MILLISECONDS.toMinutes(time) - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(time)), TimeUnit.MILLISECONDS.toSeconds(time) - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(time)))); } Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); } }
From source file:edu.harvard.liblab.ecru.LoadCsvData.java
/** * @param args/*from w w w. ja v a 2s . c o m*/ */ public static void main(String[] args) { if (args.length > 7 | args.length == 0 || !args[0].equals("-f") || !args[2].equals("-u") || !args[4].equals("-i")) { System.err.println(USAGE); System.exit(1); } String filename = args[1].trim(); url = args[3].trim(); needsPrefix = !args[5].equals("unique"); isVerbose = (args.length == 7 && args[6].equals("-v")); System.out.println("Loading data from " + filename + " " + (needsPrefix ? "IDs will be prefixed " : " ")); long start = System.currentTimeMillis(); boolean isReading = false; CSVPrinter printer = null; CSVFormat format = CSVFormat.EXCEL.withHeader().withDelimiter(',').withAllowMissingColumnNames(true); CSVParser parser; try { if (isVerbose) { printer = new CSVPrinter(System.err, format.withDelimiter('|')); } parser = CSVParser.parse(new File(filename), Charset.forName("UTF-8"), format); solrSrvr = SingletonSolrServer.getSolrServer(url); for (CSVRecord record : parser) { numRecs++; HashMap<String, String> recMap = new HashMap<String, String>(); for (String field : FIELDS) { String value = null; try { value = record.get(field); } catch (IllegalArgumentException e) { if (e.getMessage().indexOf("expected one of") == -1) { e.printStackTrace(); System.exit(1); } } value = value == null ? "" : value.trim(); recMap.put(field, value); } String id = recMap.get("ID"); if (id.isEmpty()) { if (isVerbose) { System.err.println("Record missing ID: "); printer.printRecord(record); } } else { String type = recMap.get("Type"); SolrDocument sdoc = getDocFromSolr(recMap.get("ID")); try { if (type.toLowerCase().equals("course")) { processCourse(recMap, sdoc); isReading = false; } else { if (!isReading) { addUpdateCommit(); // just in case the preceeding course(s) are related } processReading(recMap, sdoc); isReading = true; } } catch (Exception e) { if (isVerbose) { System.err.println("Record # " + numRecs + " not used:\n\t" + e.getMessage()); } errRecs++; } } if (beans.size() > 20) { addUpdateCommit(); } } parser.close(); if (beans.size() > 0 || docUpdates.size() > 0) { addUpdateCommit(); } } catch (FileNotFoundException e) { System.err.println(filename + " not found"); System.exit(1); } catch (Exception e) { e.printStackTrace(); System.exit(1); } long end = System.currentTimeMillis(); long courseTime = (end - start) / (long) 1000; try { solrSrvr.optimize(); } catch (SolrServerException e) { e.printStackTrace(); System.exit(1); } catch (IOException e) { e.printStackTrace(); System.exit(1); } System.out.println(numRecs + " records found, of which " + errRecs + " had a problem; time: " + courseTime + " seconds " + ((courseTime > 60) ? ("(" + (courseTime / (long) 60) + " minutes)") : "")); System.exit(0); }
From source file:org.openspaces.rest.space.ReplicationRESTController.java
public static void main(String[] args) throws Exception { ReplicationRESTController ctl = new ReplicationRESTController(); HashMap body = new HashMap(); Map site1 = new HashMap(); site1.put("rest-api", "ec2-54-221-106-51.compute-1.amazonaws.com:8100"); site1.put("site-id", "EC2"); Map site2 = new HashMap(); site2.put("rest-api", "15.185.186.251:8100"); site2.put("site-id", "HPCS"); List sites = new ArrayList(); sites.add(site1);// w w w. j av a2 s . c o m sites.add(site2); body.put("cloudify-instances", sites); HashMap tspec = new HashMap(); tspec.put("name", "my-topo"); List edges = new ArrayList(); HashMap edge1 = new HashMap(); edge1.put("fromSite", "EC2"); edge1.put("fromSpace", "test"); edge1.put("toSite", "HPCS"); edge1.put("toSpace", "test"); edges.add(edge1); tspec.put("edges", edges); HashMap ports = new HashMap(); HashMap port1 = new HashMap(); port1.put("discovery", DEFAULT_DISCOVERY_PORT); port1.put("data", DEFAULT_DATA_PORT); ports.put("EC2", port1); HashMap port2 = new HashMap(); port2.put("discovery", DEFAULT_DISCOVERY_PORT); port2.put("data", DEFAULT_DATA_PORT); ports.put("HPCS", port2); tspec.put("ports", ports); body.put("topology-spec", tspec); body.put("initial-memory-size", "256m"); body.put("max-memory-size", "512m"); body.put("highly-available", "false"); body.put("max-vm-size", "512"); body.put("batch-size", "1024"); body.put("send-interval", "1000"); ctl.deployTopology("my-topo", body); //deploy service test /* RestClient rc=new RestClient(new URL("http://localhost:8100"),"","",CLOUDIFY_VERSION); rc.connect(); InstallServiceRequest r=new InstallServiceRequest(); File packed=Packager.pack(new File("c:\\gigaspaces\\src\\github\\cloudify-recipes\\services\\tomcat")); final UploadResponse uploadResponse = rc.upload(null, packed); final String uploadKey = uploadResponse.getUploadKey(); r.setServiceFolderUploadKey(uploadKey); r.setAuthGroups(""); r.setServiceFileName("tomcat-service.groovy"); InstallServiceResponse installServiceRespone = rc.installService("test","yomama", r); System.out.println(uploadKey); //r.set //rc.installService("default","test",); */ /* HttpClient client=HttpClientBuilder.create().build(); HttpGet get=new HttpGet("http://ec2-54-197-25-132.compute-1.amazonaws.com:8100/"); HttpResponse response=client.execute(get); InputStream is=response.getEntity().getContent(); BufferedReader rdr=new BufferedReader(new InputStreamReader(is)); String line=""; while((line=rdr.readLine())!=null){ System.out.println(line); } */ /*ObjectMapper om=new ObjectMapper(); JsonNode node=om.readTree(is); System.out.println(node.get("response").get(1));*/ /* ObjectMapper om=new ObjectMapper(); JsonNode node=om.readTree("{\"response\":[\"app1\",\"app2\"]}"); for(Iterator<JsonNode> it=node.get("response").getElements();it.hasNext();){ System.out.println(it.next().getValueAsText()); } */ }
From source file:eu.fbk.dkm.sectionextractor.pantheon.WikipediaGoodTextExtractor.java
public static void main(String args[]) throws IOException { CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger(); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("wikipedia xml dump file").isRequired().withLongOpt("wikipedia-dump").create("d")); commandLineWithLogger.addOption(OptionBuilder.withArgName("dir").hasArg() .withDescription("output directory in which to store output files").isRequired() .withLongOpt("output-dir").create("o")); commandLineWithLogger/*from w w w . j a va 2s.com*/ .addOption(OptionBuilder.withDescription("use NAF format").withLongOpt("naf").create("n")); commandLineWithLogger.addOption(OptionBuilder.withDescription("tokenize and ssplit with Stanford") .withLongOpt("stanford").create("s")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Filter file") .withLongOpt("filter").create("f")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("ID and category file").withLongOpt("idcat").create("i")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Redirect file") .withLongOpt("redirect").create("r")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription( "number of threads (default " + AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER + ")") .withLongOpt("num-threads").create("t")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription("number of pages to process (default all)").withLongOpt("num-pages").create("p")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription("receive notification every n pages (default " + AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT + ")") .withLongOpt("notification-point").create("b")); commandLineWithLogger.addOption(new Option("n", "NAF format")); CommandLine commandLine = null; try { commandLine = commandLineWithLogger.getCommandLine(args); PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps()); } catch (Exception e) { System.exit(1); } int numThreads = Integer.parseInt(commandLine.getOptionValue("num-threads", Integer.toString(AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER))); int numPages = Integer.parseInt(commandLine.getOptionValue("num-pages", Integer.toString(AbstractWikipediaExtractor.DEFAULT_NUM_PAGES))); int notificationPoint = Integer.parseInt(commandLine.getOptionValue("notification-point", Integer.toString(AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT))); boolean nafFormat = commandLine.hasOption("n"); boolean useStanford = commandLine.hasOption("s"); HashMap<Integer, String> idCategory = new HashMap<>(); String idcatFileName = commandLine.getOptionValue("idcat"); if (idcatFileName != null) { logger.info("Loading categories"); File idcatFile = new File(idcatFileName); if (idcatFile.exists()) { List<String> lines = Files.readLines(idcatFile, Charsets.UTF_8); for (String line : lines) { line = line.trim(); if (line.length() == 0) { continue; } String[] parts = line.split("\\s+"); if (parts.length < 3) { continue; } idCategory.put(Integer.parseInt(parts[1]), parts[2]); } } } HashMap<String, String> redirects = new HashMap<>(); String redirectFileName = commandLine.getOptionValue("redirect"); if (redirectFileName != null) { logger.info("Loading redirects"); File redirectFile = new File(redirectFileName); if (redirectFile.exists()) { List<String> lines = Files.readLines(redirectFile, Charsets.UTF_8); for (String line : lines) { line = line.trim(); if (line.length() == 0) { continue; } String[] parts = line.split("\\t+"); if (parts.length < 2) { continue; } redirects.put(parts[0], parts[1]); } } } HashSet<String> pagesToConsider = null; String filterFileName = commandLine.getOptionValue("filter"); if (filterFileName != null) { logger.info("Loading file list"); File filterFile = new File(filterFileName); if (filterFile.exists()) { pagesToConsider = new HashSet<>(); List<String> lines = Files.readLines(filterFile, Charsets.UTF_8); for (String line : lines) { line = line.trim(); if (line.length() == 0) { continue; } line = line.replaceAll("\\s+", "_"); pagesToConsider.add(line); addRedirects(pagesToConsider, redirects, line, 0); } } } ExtractorParameters extractorParameters = new ExtractorParameters( commandLine.getOptionValue("wikipedia-dump"), commandLine.getOptionValue("output-dir")); File outputFolder = new File(commandLine.getOptionValue("output-dir")); if (!outputFolder.exists()) { boolean mkdirs = outputFolder.mkdirs(); if (!mkdirs) { throw new IOException("Unable to create folder " + outputFolder.getAbsolutePath()); } } WikipediaExtractor wikipediaPageParser = new WikipediaGoodTextExtractor(numThreads, numPages, extractorParameters.getLocale(), outputFolder, nafFormat, pagesToConsider, useStanford, idCategory); wikipediaPageParser.setNotificationPoint(notificationPoint); wikipediaPageParser.start(extractorParameters); logger.info("extraction ended " + new Date()); }
From source file:DIA_Umpire_Quant.DIA_Umpire_ExtLibSearch.java
/** * @param args the command line arguments *///from ww w .ja v a 2 s . co m public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire targeted re-extraction analysis using external library (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length != 1) { System.out.println( "command format error, the correct format should be: java -jar -Xmx10G DIA_Umpire_ExtLibSearch.jar diaumpire_module.params"); return; } try { ConsoleLogger.SetConsoleLogger(Level.INFO); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_extlibsearch.log"); } catch (Exception e) { } Logger.getRootLogger().info("Version: " + UmpireInfo.GetInstance().Version); Logger.getRootLogger().info("Parameter file:" + args[0]); BufferedReader reader = new BufferedReader(new FileReader(args[0])); String line = ""; String WorkFolder = ""; int NoCPUs = 2; String ExternalLibPath = ""; String ExternalLibDecoyTag = "DECOY"; float ExtProbThreshold = 0.99f; float RTWindow_Ext = -1f; TandemParam tandemPara = new TandemParam(DBSearchParam.SearchInstrumentType.TOF5600); HashMap<String, File> AssignFiles = new HashMap<>(); //<editor-fold defaultstate="collapsed" desc="Reading parameter file"> while ((line = reader.readLine()) != null) { line = line.trim(); Logger.getRootLogger().info(line); if (!"".equals(line) && !line.startsWith("#")) { //System.out.println(line); if (line.equals("==File list begin")) { do { line = reader.readLine(); line = line.trim(); if (line.equals("==File list end")) { continue; } else if (!"".equals(line)) { File newfile = new File(line); if (newfile.exists()) { AssignFiles.put(newfile.getAbsolutePath(), newfile); } else { Logger.getRootLogger().info("File: " + newfile + " does not exist."); } } } while (!line.equals("==File list end")); } if (line.split("=").length < 2) { continue; } String type = line.split("=")[0].trim(); String value = line.split("=")[1].trim(); switch (type) { case "Path": { WorkFolder = value; break; } case "path": { WorkFolder = value; break; } case "Thread": { NoCPUs = Integer.parseInt(value); break; } case "Fasta": { tandemPara.FastaPath = value; break; } case "DecoyPrefix": { if (!"".equals(value)) { tandemPara.DecoyPrefix = value; } break; } case "ExternalLibPath": { ExternalLibPath = value; break; } case "ExtProbThreshold": { ExtProbThreshold = Float.parseFloat(value); break; } case "RTWindow_Ext": { RTWindow_Ext = Float.parseFloat(value); break; } case "ExternalLibDecoyTag": { ExternalLibDecoyTag = value; if (ExternalLibDecoyTag.endsWith("_")) { ExternalLibDecoyTag = ExternalLibDecoyTag.substring(0, ExternalLibDecoyTag.length() - 1); } break; } } } } //</editor-fold> //Initialize PTM manager using compomics library PTMManager.GetInstance(); //Check if the fasta file can be found if (!new File(tandemPara.FastaPath).exists()) { Logger.getRootLogger().info("Fasta file :" + tandemPara.FastaPath + " cannot be found, the process will be terminated, please check."); System.exit(1); } //Generate DIA file list ArrayList<DIAPack> FileList = new ArrayList<>(); File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("The path : " + WorkFolder + " cannot be found."); System.exit(1); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && (fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && (fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") | fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzml")) && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); if (mzXMLFile.toLowerCase().endsWith(".mzxml") | mzXMLFile.toLowerCase().endsWith(".mzml")) { DIAPack DiaFile = new DIAPack(mzXMLFile, NoCPUs); Logger.getRootLogger().info( "================================================================================================="); Logger.getRootLogger().info("Processing " + mzXMLFile); if (!DiaFile.LoadDIASetting()) { Logger.getRootLogger().info("Loading DIA setting failed, job is incomplete"); System.exit(1); } if (!DiaFile.LoadParams()) { Logger.getRootLogger().info("Loading parameters failed, job is incomplete"); System.exit(1); } Logger.getRootLogger().info("Loading identification results " + mzXMLFile + "...."); //If the serialization file for ID file existed if (DiaFile.ReadSerializedLCMSID()) { DiaFile.IDsummary.ReduceMemoryUsage(); DiaFile.IDsummary.FastaPath = tandemPara.FastaPath; FileList.add(DiaFile); } } } //<editor-fold defaultstate="collapsed" desc="Targeted re-extraction using external library"> //External library search Logger.getRootLogger().info("Targeted extraction using external library"); //Read exteranl library FragmentLibManager ExlibManager = FragmentLibManager.ReadFragmentLibSerialization(WorkFolder, FilenameUtils.getBaseName(ExternalLibPath)); if (ExlibManager == null) { ExlibManager = new FragmentLibManager(FilenameUtils.getBaseName(ExternalLibPath)); //Import traML file ExlibManager.ImportFragLibByTraML(ExternalLibPath, ExternalLibDecoyTag); //Check if there are decoy spectra ExlibManager.CheckDecoys(); //ExlibManager.ImportFragLibBySPTXT(ExternalLibPath); ExlibManager.WriteFragmentLibSerialization(WorkFolder); } Logger.getRootLogger() .info("No. of peptide ions in external lib:" + ExlibManager.PeptideFragmentLib.size()); for (DIAPack diafile : FileList) { if (diafile.IDsummary == null) { diafile.ReadSerializedLCMSID(); } //Generate RT mapping RTMappingExtLib RTmap = new RTMappingExtLib(diafile.IDsummary, ExlibManager, diafile.GetParameter()); RTmap.GenerateModel(); RTmap.GenerateMappedPepIon(); diafile.BuildStructure(); diafile.MS1FeatureMap.ReadPeakCluster(); diafile.GenerateMassCalibrationRTMap(); //Perform targeted re-extraction diafile.TargetedExtractionQuant(false, ExlibManager, ExtProbThreshold, RTWindow_Ext); diafile.MS1FeatureMap.ClearAllPeaks(); diafile.IDsummary.ReduceMemoryUsage(); //Remove target IDs below the defined probability threshold diafile.IDsummary.RemoveLowProbMappedIon(ExtProbThreshold); diafile.ExportID(); diafile.ClearStructure(); Logger.getRootLogger().info("Peptide ions: " + diafile.IDsummary.GetPepIonList().size() + " Mapped ions: " + diafile.IDsummary.GetMappedPepIonList().size()); } //</editor-fold> Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); }
From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.tipicality.Test.java
public static void main(String[] args) throws IOException, ClassNotFoundException { String path = DBpediaOntology.DBPEDIA_CSV_FOLDER; if (args != null && args.length > 0) { path = args[0];// w w w . j a v a 2 s. c om if (!path.endsWith("/")) { path = path + "/"; } } stopAttributes.add("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); stopAttributes.add("http://www.w3.org/2002/07/owl#sameAs"); stopAttributes.add("http://dbpedia.org/ontology/wikiPageRevisionID"); stopAttributes.add("http://dbpedia.org/ontology/wikiPageID"); stopAttributes.add("http://purl.org/dc/elements/1.1/description"); stopAttributes.add("http://dbpedia.org/ontology/thumbnail"); stopAttributes.add("http://dbpedia.org/ontology/type"); try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(path + "counts.bin"))) { categories = (HashSet<String>) ois.readObject(); attributes = (HashSet<String>) ois.readObject(); categoryCount = (HashMap<String, Integer>) ois.readObject(); attributeCount = (HashMap<String, Integer>) ois.readObject(); categoryAttributeCount = (HashMap<String, HashMap<String, Integer>>) ois.readObject(); attributeCategoryCount = (HashMap<String, HashMap<String, Integer>>) ois.readObject(); } System.out.println(categories.size() + " categories found"); System.out.println(attributes.size() + " attributes found"); n = 0; for (Map.Entry<String, Integer> e : categoryCount.entrySet()) { n += e.getValue(); } System.out.println(n); HashMap<String, ArrayList<Pair>> sortedCategoryAttributes = new HashMap<>(); for (String category : categories) { //System.out.println(category); //System.out.println("-----------"); ArrayList<Pair> attributesRank = new ArrayList<Pair>(); Integer c = categoryCount.get(category); if (c == null || c == 0) { continue; } HashMap<String, Integer> thisCategoryAttributeCount = categoryAttributeCount.get(category); for (Map.Entry<String, Integer> e : thisCategoryAttributeCount.entrySet()) { attributesRank.add(new Pair(e.getKey(), 1.0 * e.getValue() / c)); } Collections.sort(attributesRank); for (Pair p : attributesRank) { //System.out.println("A:" + p.getS() + "\t" + p.getP()); } //System.out.println("==============================="); sortedCategoryAttributes.put(category, attributesRank); } for (String attribute : attributes) { //System.out.println(attribute); //System.out.println("-----------"); ArrayList<Pair> categoriesRank = new ArrayList<>(); Integer a = attributeCount.get(attribute); if (a == null || a == 0) { continue; } HashMap<String, Integer> thisAttributeCategoryCount = attributeCategoryCount.get(attribute); for (Map.Entry<String, Integer> e : thisAttributeCategoryCount.entrySet()) { categoriesRank.add(new Pair(e.getKey(), 1.0 * e.getValue() / a)); } Collections.sort(categoriesRank); for (Pair p : categoriesRank) { //System.out.println("C:" + p.getS() + "\t" + p.getP()); } //System.out.println("==============================="); } HashMap<Integer, Integer> histogram = new HashMap<>(); histogram.put(0, 0); histogram.put(1, 0); histogram.put(2, 0); histogram.put(Integer.MAX_VALUE, 0); int nTest = 0; if (args != null && args.length > 0) { path = args[0]; if (!path.endsWith("/")) { path = path + "/"; } } for (File f : new File(path).listFiles()) { if (f.isFile() && f.getName().endsWith(".csv")) { String category = f.getName().replaceFirst("\\.csv", ""); System.out.println("Category: " + category); ArrayList<HashSet<String>> entities = extractEntities(f, 2); for (HashSet<String> attributesOfThisEntity : entities) { nTest++; ArrayList<String> rankedCategories = rankedCategories(attributesOfThisEntity); boolean found = false; for (int i = 0; i < rankedCategories.size() && !found; i++) { if (rankedCategories.get(i).equals(category)) { Integer count = histogram.get(i); if (count == null) { histogram.put(i, 1); } else { histogram.put(i, count + 1); } found = true; } } if (!found) { histogram.put(Integer.MAX_VALUE, histogram.get(Integer.MAX_VALUE) + 1); } } System.out.println("Tested entities: " + nTest); System.out.println("1: " + histogram.get(0)); System.out.println("2: " + histogram.get(1)); System.out.println("3: " + histogram.get(2)); System.out.println("+3: " + (nTest - histogram.get(2) - histogram.get(1) - histogram.get(0) - histogram.get(Integer.MAX_VALUE))); System.out.println("NF: " + histogram.get(Integer.MAX_VALUE)); } } }