List of usage examples for java.util HashMap put
public V put(K key, V value)
From source file:discovery.compression.kdd2011.ratio.RatioCompressionReport.java
public static void main(String[] args) throws GraphReadingException, IOException, java.text.ParseException { opts.addOption("r", true, "Goal compression ratio"); // opts.addOption( "a", // true, // "Algorithm used for compression. The default and only currently available option is \"greedy\""); //opts.addOption("cost-output",true,"Output file for costs, default is costs.txt"); //opts.addOption("cost-format",true,"Output format for "); opts.addOption("ctype", true, "Connectivity type: global or local, default is global."); opts.addOption("connectivity", false, "enables output for connectivity. Connectivity info will be written to connectivity.txt"); opts.addOption("output_bmg", true, "Write bmg file with groups to given file."); opts.addOption("algorithm", true, "Algorithm to use, one of: greedy random1 random2 bruteforce slowgreedy"); opts.addOption("hop2", false, "Only try to merge nodes that have common neighbors"); opts.addOption("kmedoids", false, "Enables output for kmedoids clustering"); opts.addOption("kmedoids_k", true, "Number of clusters to be used in kmedoids. Default is 3"); opts.addOption("kmedoids_output", true, "Output file for kmedoid clusters. Default is clusters.txt. This file will be overwritten."); opts.addOption("norefresh", false, "Use old style merging: all connectivities are not refreshed when merging"); opts.addOption("edge_attribute", true, "Attribute from bmgraph used as edge weight"); opts.addOption("only_times", false, "Only write times.txt"); //opts.addOption("no_metrics",false,"Exit after compression, don't calculate any metrics or produce output bmg for the compression."); CommandLineParser parser = new PosixParser(); CommandLine cmd = null;/*from ww w. ja v a 2 s. c o m*/ try { cmd = parser.parse(opts, args); } catch (ParseException e) { e.printStackTrace(); System.exit(0); } boolean connectivity = false; double ratio = 0; boolean hop2 = cmd.hasOption("hop2"); RatioCompression compression = new GreedyRatioCompression(hop2); if (cmd.hasOption("connectivity")) connectivity = true; ConnectivityType ctype = ConnectivityType.GLOBAL; CompressionMergeModel mergeModel = new PathAverageMergeModel(); if (cmd.hasOption("ctype")) { String ctypeStr = cmd.getOptionValue("ctype"); if (ctypeStr.equals("local")) { ctype = ConnectivityType.LOCAL; mergeModel = new EdgeAverageMergeModel(); } else if (ctypeStr.equals("global")) { ctype = ConnectivityType.GLOBAL; mergeModel = new PathAverageMergeModel(); } else { System.out.println(PROGRAM_NAME + ": unknown connectivity type " + ctypeStr); printHelp(); } } if (cmd.hasOption("norefresh")) mergeModel = new PathAverageMergeModelNorefresh(); if (cmd.hasOption("algorithm")) { String alg = cmd.getOptionValue("algorithm"); if (alg.equals("greedy")) { compression = new GreedyRatioCompression(hop2); } else if (alg.equals("random1")) { compression = new RandomRatioCompression(hop2); } else if (alg.equals("random2")) { compression = new SmartRandomRatioCompression(hop2); } else if (alg.equals("bruteforce")) { compression = new BruteForceCompression(hop2, ctype == ConnectivityType.LOCAL); } else if (alg.equals("slowgreedy")) { compression = new SlowGreedyRatioCompression(hop2); } else { System.out.println("algorithm must be one of: greedy random1 random2 bruteforce slowgreedy"); printHelp(); } } compression.setMergeModel(mergeModel); if (cmd.hasOption("r")) { ratio = Double.parseDouble(cmd.getOptionValue("r")); } else { System.out.println(PROGRAM_NAME + ": compression ratio not defined"); printHelp(); } if (cmd.hasOption("help")) { printHelp(); } String infile = null; if (cmd.getArgs().length != 0) { infile = cmd.getArgs()[0]; } else { printHelp(); } boolean kmedoids = false; int kmedoidsK = 3; String kmedoidsOutput = "clusters.txt"; if (cmd.hasOption("kmedoids")) kmedoids = true; if (cmd.hasOption("kmedoids_k")) kmedoidsK = Integer.parseInt(cmd.getOptionValue("kmedoids_k")); if (cmd.hasOption("kmedoids_output")) kmedoidsOutput = cmd.getOptionValue("kmedoids_output"); String edgeAttrib = "goodness"; if (cmd.hasOption("edge_attribute")) edgeAttrib = cmd.getOptionValue("edge_attribute"); // This program should directly use bmgraph-java to read and // DefaultGraph should have a constructor that takes a BMGraph as an // argument. //VisualGraph vg = new VisualGraph(infile, edgeAttrib, false); //System.out.println("vg read"); //SimpleVisualGraph origSG = new SimpleVisualGraph(vg); BMGraph bmg = BMGraphUtils.readBMGraph(infile); int origN = bmg.getNodes().size(); //for(int i=0;i<origN;i++) //System.out.println(i+"="+origSG.getVisualNode(i)); System.out.println("bmgraph read"); BMNode[] i2n = new BMNode[origN]; HashMap<BMNode, Integer> n2i = new HashMap<BMNode, Integer>(); { int pi = 0; for (BMNode nod : bmg.getNodes()) { n2i.put(nod, pi); i2n[pi++] = nod; } } DefaultGraph dg = new DefaultGraph(); for (BMEdge e : bmg.getEdges()) { dg.addEdge(n2i.get(e.getSource()), n2i.get(e.getTarget()), Double.parseDouble(e.get(edgeAttrib))); } DefaultGraph origDG = dg.copy(); System.out.println("inputs read"); RatioCompression nopCompressor = new RatioCompression.DefaultRatioCompression(); ResultGraph nopResult = nopCompressor.compressGraph(dg, 1); long start = System.currentTimeMillis(); ResultGraph result = compression.compressGraph(dg, ratio); long timeSpent = System.currentTimeMillis() - start; double seconds = timeSpent * 0.001; BufferedWriter timesWriter = new BufferedWriter(new FileWriter("times.txt", true)); timesWriter.append("" + seconds + "\n"); timesWriter.close(); if (cmd.hasOption("only_times")) { System.out.println("Compression done, exiting."); System.exit(0); } BufferedWriter costsWriter = new BufferedWriter(new FileWriter("costs.txt", true)); costsWriter.append("" + nopResult.getCompressorCosts() + " " + result.getCompressorCosts() + "\n"); costsWriter.close(); double[][] origProb; double[][] compProb; int[] group = new int[origN]; for (int i = 0; i < result.partition.size(); i++) for (int x : result.partition.get(i)) group[x] = i; if (ctype == ConnectivityType.LOCAL) { origProb = new double[origN][origN]; compProb = new double[origN][origN]; DefaultGraph g = result.uncompressedGraph(); for (int i = 0; i < origN; i++) { for (int j = 0; j < origN; j++) { origProb[i][j] = dg.getEdgeWeight(i, j); compProb[i][j] = g.getEdgeWeight(i, j); } } System.out.println("Writing edge-dissimilarity"); } else { origProb = ProbDijkstra.getProbMatrix(origDG); compProb = new double[origN][origN]; System.out.println("nodeCount = " + result.graph.getNodeCount()); double[][] ccProb = ProbDijkstra.getProbMatrix(result.graph); System.out.println("ccProb.length = " + ccProb.length); System.out.println("ccProb[0].length = " + ccProb[0].length); for (int i = 0; i < origN; i++) { for (int j = 0; j < origN; j++) { if (group[i] == group[j]) compProb[i][j] = result.graph.getEdgeWeight(group[i], group[j]); else { int gj = group[j]; int gi = group[i]; compProb[i][j] = ccProb[group[i]][group[j]]; } } } System.out.println("Writing best-path-dissimilarity"); //compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph()); } { BufferedWriter connWr = null;// if (connectivity) { connWr = new BufferedWriter(new FileWriter("connectivity.txt", true)); } double totalDiff = 0; for (int i = 0; i < origN; i++) { for (int j = i + 1; j < origN; j++) { double diff = Math.abs(origProb[i][j] - compProb[i][j]); //VisualNode ni = origSG.getVisualNode(i); //VisualNode nj = origSG.getVisualNode(j); BMNode ni = i2n[i]; BMNode nj = i2n[j]; if (connectivity) connWr.append(ni + "\t" + nj + "\t" + origProb[i][j] + "\t" + compProb[i][j] + "\t" + diff + "\n"); totalDiff += diff * diff; } } if (connectivity) { connWr.append("\n"); connWr.close(); } totalDiff = Math.sqrt(totalDiff); BufferedWriter dissWr = new BufferedWriter(new FileWriter("dissimilarity.txt", true)); dissWr.append("" + totalDiff + "\n"); dissWr.close(); } if (cmd.hasOption("output_bmg")) { BMGraph outgraph = new BMGraph(); String outputfile = cmd.getOptionValue("output_bmg"); HashMap<Integer, BMNode> nodes = new HashMap<Integer, BMNode>(); for (int i = 0; i < result.partition.size(); i++) { ArrayList<Integer> g = result.partition.get(i); if (g.size() == 0) continue; BMNode node = new BMNode("Supernode_" + i); HashMap<String, String> attributes = new HashMap<String, String>(); StringBuffer contents = new StringBuffer(); for (int x : g) contents.append(i2n[x] + ","); contents.delete(contents.length() - 1, contents.length()); attributes.put("nodes", contents.toString()); attributes.put("self-edge", "" + result.graph.getEdgeWeight(i, i)); node.setAttributes(attributes); nodes.put(i, node); outgraph.ensureHasNode(node); } for (int i = 0; i < result.partition.size(); i++) { if (result.partition.get(i).size() == 0) continue; for (int x : result.graph.getNeighbors(i)) { if (x < i) continue; BMNode from = nodes.get(i); BMNode to = nodes.get(x); if (from == null || to == null) { System.out.println(from + "->" + to); System.out.println(i + "->" + x); System.out.println(""); } BMEdge e = new BMEdge(nodes.get(i), nodes.get(x), "notype"); e.setAttributes(new HashMap<String, String>()); e.put("goodness", "" + result.graph.getEdgeWeight(i, x)); outgraph.ensureHasEdge(e); } } BMGraphUtils.writeBMGraph(outgraph, outputfile); } // k medoids! if (kmedoids) { //KMedoidsResult clustersOrig=KMedoids.runKMedoids(origProb,kmedoidsK); if (ctype == ConnectivityType.LOCAL) { compProb = ProbDijkstra.getProbMatrix(result.uncompressedGraph()); } //KMedoidsResult compClusters = KMedoids.runKMedoids(ProbDijkstra.getProbMatrix(result.graph),kmedoidsK); KMedoidsResult clustersComp = KMedoids.runKMedoids(compProb, kmedoidsK); BufferedWriter bw = new BufferedWriter(new FileWriter(kmedoidsOutput)); for (int i = 0; i < origN; i++) { int g = group[i]; //bw.append(origSG.getVisualNode(i).getBMNode()+" "+compClusters.clusters[g]+"\n"); bw.append(i2n[i] + " " + clustersComp.clusters[i] + "\n"); } bw.close(); } System.exit(0); }
From source file:gov.nih.nci.caintegrator.application.gpvisualizer.CaIntegratorRunVisualizer.java
/** * args[0] = visualizer task name args[1] = command line args[2] = debug * flag args[3] = OS required for running args[4] = CPU type required for * running args[5] = libdir on server for this task args[6] = CSV list of * downloadable files for inputs args[7] = CSV list of input parameter names * args[8] = CSV list of support file names args[9] = CSV list of support * file modification dates args[10] = server URL args[11] = LSID of task * args[12...n] = optional input parameter arguments *//*from w w w . j a v a2 s . c o m*/ public static void main(String[] args) { String[] wellKnownNames = { RunVisualizerConstants.NAME, RunVisualizerConstants.COMMAND_LINE, RunVisualizerConstants.DEBUG, RunVisualizerConstants.OS, RunVisualizerConstants.CPU_TYPE, RunVisualizerConstants.LIBDIR, RunVisualizerConstants.DOWNLOAD_FILES, RunVisualizerConstants.LSID }; int PARAM_NAMES = 7; int SUPPORT_FILE_NAMES = PARAM_NAMES + 1; int SUPPORT_FILE_DATES = SUPPORT_FILE_NAMES + 1; int SERVER = SUPPORT_FILE_DATES + 1; int LSID = SERVER + 1; int TASK_ARGS = LSID + 1; try { HashMap params = new HashMap(); for (int i = 0; i < wellKnownNames.length; i++) { params.put(wellKnownNames[i], args[i]); } String name = (String) params.get(RunVisualizerConstants.NAME); StringTokenizer stParameterNames = new StringTokenizer(args[PARAM_NAMES], ", "); int argNum = TASK_ARGS; // when pulling parameters from the command line, don't assume that // all were provided. // some could be missing! while (stParameterNames.hasMoreTokens()) { String paramName = stParameterNames.nextToken(); if (argNum < args.length) { String paramValue = args[argNum++]; params.put(paramName, paramValue); } else { System.err.println("No value specified for " + paramName); } } URL source = new URL(args[SERVER]); StringTokenizer stFileNames = new StringTokenizer(args[SUPPORT_FILE_NAMES], ","); StringTokenizer stFileDates = new StringTokenizer(args[SUPPORT_FILE_DATES], ","); String[] supportFileNames = new String[stFileNames.countTokens()]; long[] supportFileDates = new long[supportFileNames.length]; String filename = null; String fileDate = null; int f = 0; while (stFileNames.hasMoreTokens()) { supportFileNames[f] = stFileNames.nextToken(); if (stFileDates.hasMoreTokens()) { supportFileDates[f] = Long.parseLong(stFileDates.nextToken()); } else { supportFileDates[f] = -1; } f++; } CaIntegratorRunVisualizer visualizer = new CaIntegratorRunVisualizer(params, supportFileNames, supportFileDates, new Applet()); visualizer.run(); } catch (Throwable t) { t.printStackTrace(); } }
From source file:akori.AKORI.java
public static void main(String[] args) throws IOException, InterruptedException { System.out.println("esto es AKORI"); URL = "http://www.mbauchile.cl"; PATH = "E:\\NetBeansProjects\\AKORI\\"; NAME = "mbauchile.png"; // Extrar DOM tree Document doc = Jsoup.connect(URL).timeout(0).get(); // The Firefox driver supports javascript WebDriver driver = new FirefoxDriver(); driver.manage().window().maximize(); System.out.println(driver.manage().window().getSize().toString()); System.out.println(driver.manage().window().getPosition().toString()); int xmax = driver.manage().window().getSize().width; int ymax = driver.manage().window().getSize().height; // Go to the URL page driver.get(URL);/*from www . ja va 2 s . c om*/ File screen = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE); FileUtils.copyFile(screen, new File(PATH + NAME)); BufferedImage img = ImageIO.read(new File(PATH + NAME)); //Graphics2D graph = img.createGraphics(); BufferedImage img1 = new BufferedImage(xmax, ymax, BufferedImage.TYPE_INT_ARGB); Graphics2D graph1 = img.createGraphics(); double[][] matrix = new double[ymax][xmax]; BufferedReader in = new BufferedReader(new FileReader("et.txt")); String linea; double max = 0; graph1.drawImage(img, 0, 0, null); HashMap<String, Integer> lista = new HashMap<String, Integer>(); int count = 0; for (int i = 0; (linea = in.readLine()) != null && i < 10000; ++i) { String[] datos = linea.split(","); int x = (int) Double.parseDouble(datos[0]); int y = (int) Double.parseDouble(datos[2]); long time = Double.valueOf(datos[4]).longValue(); if (x >= xmax || y >= ymax) continue; if (time < 691215) continue; if (time > 705648) break; if (lista.containsKey(x + "," + y)) lista.put(x + "," + y, lista.get(x + "," + y) + 1); else lista.put(x + "," + y, 1); ++count; } System.out.println(count); in.close(); Iterator iter = lista.entrySet().iterator(); Map.Entry e; for (String key : lista.keySet()) { Integer i = lista.get(key); if (max < i) max = i; } System.out.println(max); max = 0; while (iter.hasNext()) { e = (Map.Entry) iter.next(); String xy = (String) e.getKey(); String[] datos = xy.split(","); int x = Integer.parseInt(datos[0]); int y = Integer.parseInt(datos[1]); matrix[y][x] += (int) e.getValue(); double aux; if ((aux = normalMatrix(matrix, y, x, ((int) e.getValue()) * 4)) > max) { max = aux; } //normalMatrix(matrix,x,y,20); if (matrix[y][x] > max) max = matrix[y][x]; } int A, R, G, B, n; for (int i = 0; i < xmax; ++i) { for (int j = 0; j < ymax; ++j) { if (matrix[j][i] != 0) { n = (int) Math.round(matrix[j][i] * 100 / max); R = Math.round((255 * n) / 100); G = Math.round((255 * (100 - n)) / 100); B = 0; A = Math.round((255 * n) / 100); ; if (R > 255) R = 255; if (R < 0) R = 0; if (G > 255) G = 255; if (G < 0) G = 0; if (R < 50) A = 0; graph1.setColor(new Color(R, G, B, A)); graph1.fillOval(i, j, 1, 1); } } } //graph1.dispose(); ImageIO.write(img, "png", new File("example.png")); System.out.println(max); graph1.setColor(Color.RED); // Extraer elementos Elements e1 = doc.body().getAllElements(); int i = 1; ArrayList<String> tags = new ArrayList<String>(); for (Element temp : e1) { if (tags.indexOf(temp.tagName()) == -1) { tags.add(temp.tagName()); List<WebElement> query = driver.findElements(By.tagName(temp.tagName())); for (WebElement temp1 : query) { Point po = temp1.getLocation(); Dimension d = temp1.getSize(); if (d.width <= 0 || d.height <= 0 || po.x < 0 || po.y < 0) continue; System.out.println(i + " " + temp.nodeName()); System.out.println(" x: " + po.x + " y: " + po.y); System.out.println(" width: " + d.width + " height: " + d.height); graph1.draw(new Rectangle(po.x, po.y, d.width, d.height)); ++i; } } } graph1.dispose(); ImageIO.write(img, "png", new File(PATH + NAME)); driver.quit(); }
From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java
/** * @param args/*from ww w.j a va2 s. c o m*/ * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0]; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2."); firstGroup.addAll(secondGroup); } if (secondGroup.isEmpty()) { System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1."); secondGroup.addAll(firstGroup); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3); String dataAttributesInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { dataAttributesInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "correlation"; String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/"; FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(SpatioTemporalValueWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationTechniquesMapper.class); job.setReducerClass(CorrelationTechniquesReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir)); job.setJarByClass(CorrelationTechniques.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }
From source file:at.tlphotography.jAbuseReport.Reporter.java
/** * The main method.// w w w.ja va2 s . c o m * * @param args * the arguments */ public static void main(String[] args) { parseArguments(args); File[] directory = new File(logDir).listFiles(); // get the files in the dir for (File file : directory) // iterate over the file { if (!file.isDirectory() && file.getName().contains(logNames)) // if the file is not a dir and the name contains the logName string { if (file.getName().endsWith(".gz")) // is it zipped? { content.putAll(readGZFile(file)); } else { content.putAll(readLogFile(file)); } } } // save the mails to the log lines HashMap<String, ArrayList<LogObject>> finalContent = new HashMap<>(); Iterator<Entry<String, String>> it = content.entrySet().iterator(); while (it.hasNext()) { Map.Entry<String, String> pair = it.next(); String mail = whoIsLookUp(pair.getKey()); if (finalContent.containsKey(mail)) { finalContent.get(mail).add(new LogObject(pair.getValue())); } else { ArrayList<LogObject> temp = new ArrayList<LogObject>(); temp.add(new LogObject(pair.getValue())); finalContent.put(mail, temp); } it.remove(); } // sort them Iterator<Entry<String, ArrayList<LogObject>>> it2 = finalContent.entrySet().iterator(); while (it2.hasNext()) { Entry<String, ArrayList<LogObject>> pair = it2.next(); Collections.sort(pair.getValue()); println(pair.getKey() + " ="); for (LogObject obj : pair.getValue()) { println(obj.logContent); } println("\n"); it2.remove(); } }
From source file:DIA_Umpire_To_Skyline.DIA_Umpire_To_Skyline.java
/** * @param args the command line arguments */// w w w. j av a 2s . c om public static void main(String[] args) throws FileNotFoundException, IOException, Exception { System.out.println( "================================================================================================="); System.out.println("DIA-Umpire_To_Skyline (version: " + UmpireInfo.GetInstance().Version + ")"); if (args.length < 1) { System.out.println( "command format error, it should be like: java -jar -Xmx20G DIA_Umpire_To_Skyline.jar Path NoThreads"); System.out.println("command : java -jar -Xmx20G DIA_Umpire_To_Skyline.jar Path [Option]\n"); System.out.println("\nOptions"); System.out.println("\t-t\tNo. of threads, Ex: -t4 (using four threads, default value)"); System.out.println( "\t-cP\tPath of msconvert.exe for mzXML conversion, Ex: -cP (using four threads, default value)"); return; } try { ConsoleLogger.SetConsoleLogger(Level.DEBUG); ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "diaumpire_to_skyline.log"); } catch (Exception e) { System.out.println("Logger initialization failed"); } Logger.getRootLogger().info("Path:" + args[0]); String msconvertpath = "C:/inetpub/tpp-bin/msconvert"; String WorkFolder = args[0]; int NoCPUs = 4; for (int i = 1; i < args.length; i++) { if (args[i].startsWith("-")) { if (args[i].startsWith("-cP")) { msconvertpath = args[i].substring(3); Logger.getRootLogger().info("MSConvert path: " + msconvertpath); } if (args[i].startsWith("-t")) { NoCPUs = Integer.parseInt(args[i].substring(2)); Logger.getRootLogger().info("No. of threads: " + NoCPUs); } } } HashMap<String, File> AssignFiles = new HashMap<>(); try { File folder = new File(WorkFolder); if (!folder.exists()) { Logger.getRootLogger().info("Path: " + folder.getAbsolutePath() + " cannot be found."); } for (final File fileEntry : folder.listFiles()) { if (fileEntry.isFile() && fileEntry.getAbsolutePath().toLowerCase().endsWith(".mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry.getAbsolutePath(), fileEntry); } if (fileEntry.isDirectory()) { for (final File fileEntry2 : fileEntry.listFiles()) { if (fileEntry2.isFile() && fileEntry2.getAbsolutePath().toLowerCase().endsWith(".mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q1.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q2.mzxml") && !fileEntry2.getAbsolutePath().toLowerCase().endsWith("q3.mzxml")) { AssignFiles.put(fileEntry2.getAbsolutePath(), fileEntry2); } } } } Logger.getRootLogger().info("No. of files assigned :" + AssignFiles.size()); for (File fileEntry : AssignFiles.values()) { Logger.getRootLogger().info(fileEntry.getAbsolutePath()); } ExecutorService executorPool = null; executorPool = Executors.newFixedThreadPool(3); for (File fileEntry : AssignFiles.values()) { String mzXMLFile = fileEntry.getAbsolutePath(); FileThread thread = new FileThread(mzXMLFile, NoCPUs, msconvertpath); executorPool.execute(thread); } executorPool.shutdown(); try { executorPool.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); } catch (InterruptedException e) { Logger.getRootLogger().info("interrupted.."); } } catch (Exception e) { Logger.getRootLogger().error(e.getMessage()); throw e; } Logger.getRootLogger().info("Job done"); Logger.getRootLogger().info( "================================================================================================="); }
From source file:com.era7.bioinfo.annotation.AutomaticQualityControl.java
public static void main(String[] args) { if (args.length != 4) { System.out.println("This program expects four parameters: \n" + "1. Gene annotation XML filename \n" + "2. Reference protein set (.fasta)\n" + "3. Output TXT filename\n" + "4. Initial Blast XML results filename (the one used at the very beginning of the semiautomatic annotation process)\n"); } else {/*from w ww . ja v a2 s . c o m*/ BufferedWriter outBuff = null; try { File inFile = new File(args[0]); File fastaFile = new File(args[1]); File outFile = new File(args[2]); File blastFile = new File(args[3]); //Primero cargo todos los datos del archivo xml del blast BufferedReader buffReader = new BufferedReader(new FileReader(blastFile)); StringBuilder stBuilder = new StringBuilder(); String line = null; while ((line = buffReader.readLine()) != null) { stBuilder.append(line); } buffReader.close(); System.out.println("Creating blastoutput..."); BlastOutput blastOutput = new BlastOutput(stBuilder.toString()); System.out.println("BlastOutput created! :)"); stBuilder.delete(0, stBuilder.length()); HashMap<String, String> blastProteinsMap = new HashMap<String, String>(); ArrayList<Iteration> iterations = blastOutput.getBlastOutputIterations(); for (Iteration iteration : iterations) { blastProteinsMap.put(iteration.getQueryDef().split("\\|")[1].trim(), iteration.toString()); } //freeing some memory blastOutput = null; //------------------------------------------------------------------------ //Initializing writer for output file outBuff = new BufferedWriter(new FileWriter(outFile)); //reading gene annotation xml file..... buffReader = new BufferedReader(new FileReader(inFile)); stBuilder = new StringBuilder(); line = null; while ((line = buffReader.readLine()) != null) { stBuilder.append(line); } buffReader.close(); XMLElement genesXML = new XMLElement(stBuilder.toString()); //freeing some memory I don't need anymore stBuilder.delete(0, stBuilder.length()); //reading file with the reference proteins set ArrayList<String> proteinsReferenceSet = new ArrayList<String>(); buffReader = new BufferedReader(new FileReader(fastaFile)); while ((line = buffReader.readLine()) != null) { if (line.charAt(0) == '>') { proteinsReferenceSet.add(line.split("\\|")[1]); } } buffReader.close(); Element pGenes = genesXML.asJDomElement().getChild(PredictedGenes.TAG_NAME); List<Element> contigs = pGenes.getChildren(ContigXML.TAG_NAME); System.out.println("There are " + contigs.size() + " contigs to be checked... "); outBuff.write("There are " + contigs.size() + " contigs to be checked... \n"); outBuff.write("Proteins reference set: \n"); for (String st : proteinsReferenceSet) { outBuff.write(st + ","); } outBuff.write("\n"); for (Element elem : contigs) { ContigXML contig = new ContigXML(elem); //escribo el id del contig en el que estoy outBuff.write("Checking contig: " + contig.getId() + "\n"); outBuff.flush(); List<XMLElement> geneList = contig.getChildrenWith(PredictedGene.TAG_NAME); System.out.println("geneList.size() = " + geneList.size()); int numeroDeGenesParaAnalizar = geneList.size() / FACTOR; if (numeroDeGenesParaAnalizar == 0) { numeroDeGenesParaAnalizar++; } ArrayList<Integer> indicesUtilizados = new ArrayList<Integer>(); outBuff.write("\nThe contig has " + geneList.size() + " predicted genes, let's analyze: " + numeroDeGenesParaAnalizar + "\n"); for (int j = 0; j < numeroDeGenesParaAnalizar; j++) { int geneIndex; boolean geneIsDismissed = false; do { geneIsDismissed = false; geneIndex = (int) Math.round(Math.floor(Math.random() * geneList.size())); PredictedGene tempGene = new PredictedGene(geneList.get(geneIndex).asJDomElement()); if (tempGene.getStatus().equals(PredictedGene.STATUS_DISMISSED)) { geneIsDismissed = true; } } while (indicesUtilizados.contains(new Integer(geneIndex)) && geneIsDismissed); indicesUtilizados.add(geneIndex); System.out.println("geneIndex = " + geneIndex); //Ahora hay que sacar el gen correspondiente al indice y hacer el control de calidad PredictedGene gene = new PredictedGene(geneList.get(geneIndex).asJDomElement()); outBuff.write("\nAnalyzing gene with id: " + gene.getId() + " , annotation uniprot id: " + gene.getAnnotationUniprotId() + "\n"); outBuff.write("eValue: " + gene.getEvalue() + "\n"); //--------------PETICION POST HTTP BLAST---------------------- PostMethod post = new PostMethod(BLAST_URL); post.addParameter("program", "blastx"); post.addParameter("sequence", gene.getSequence()); post.addParameter("database", "uniprotkb"); post.addParameter("email", "ppareja@era7.com"); post.addParameter("exp", "1e-10"); post.addParameter("stype", "dna"); // execute the POST HttpClient client = new HttpClient(); int status = client.executeMethod(post); System.out.println("status post = " + status); InputStream inStream = post.getResponseBodyAsStream(); String fileName = "jobid.txt"; FileOutputStream outStream = new FileOutputStream(new File(fileName)); byte[] buffer = new byte[1024]; int len; while ((len = inStream.read(buffer)) != -1) { outStream.write(buffer, 0, len); } outStream.close(); //Once the file is created I just have to read one line in order to extract the job id buffReader = new BufferedReader(new FileReader(new File(fileName))); String jobId = buffReader.readLine(); buffReader.close(); System.out.println("jobId = " + jobId); //--------------HTTP CHECK JOB STATUS REQUEST---------------------- GetMethod get = new GetMethod(CHECK_JOB_STATUS_URL + jobId); String jobStatus = ""; do { try { Thread.sleep(1000);//sleep for 1000 ms } catch (InterruptedException ie) { //If this thread was intrrupted by nother thread } status = client.executeMethod(get); //System.out.println("status get = " + status); inStream = get.getResponseBodyAsStream(); fileName = "jobStatus.txt"; outStream = new FileOutputStream(new File(fileName)); while ((len = inStream.read(buffer)) != -1) { outStream.write(buffer, 0, len); } outStream.close(); //Once the file is created I just have to read one line in order to extract the job id buffReader = new BufferedReader(new FileReader(new File(fileName))); jobStatus = buffReader.readLine(); //System.out.println("jobStatus = " + jobStatus); buffReader.close(); } while (!jobStatus.equals(FINISHED_JOB_STATUS)); //Once I'm here the blast should've already finished //--------------JOB RESULTS HTTP REQUEST---------------------- get = new GetMethod(JOB_RESULT_URL + jobId + "/out"); status = client.executeMethod(get); System.out.println("status get = " + status); inStream = get.getResponseBodyAsStream(); fileName = "jobResults.txt"; outStream = new FileOutputStream(new File(fileName)); while ((len = inStream.read(buffer)) != -1) { outStream.write(buffer, 0, len); } outStream.close(); //--------parsing the blast results file----- TreeSet<GeneEValuePair> featuresBlast = new TreeSet<GeneEValuePair>(); buffReader = new BufferedReader(new FileReader(new File(fileName))); while ((line = buffReader.readLine()) != null) { if (line.length() > 3) { String prefix = line.substring(0, 3); if (prefix.equals("TR:") || prefix.equals("SP:")) { String[] columns = line.split(" "); String id = columns[1]; //System.out.println("id = " + id); String e = ""; String[] arraySt = line.split("\\.\\.\\."); if (arraySt.length > 1) { arraySt = arraySt[1].trim().split(" "); int contador = 0; for (int k = 0; k < arraySt.length && contador <= 2; k++) { String string = arraySt[k]; if (!string.equals("")) { contador++; if (contador == 2) { e = string; } } } } else { //Number before e- String[] arr = arraySt[0].split("e-")[0].split(" "); String numeroAntesE = arr[arr.length - 1]; String numeroDespuesE = arraySt[0].split("e-")[1].split(" ")[0]; e = numeroAntesE + "e-" + numeroDespuesE; } double eValue = Double.parseDouble(e); //System.out.println("eValue = " + eValue); GeneEValuePair g = new GeneEValuePair(id, eValue); featuresBlast.add(g); } } } GeneEValuePair currentGeneEValuePair = new GeneEValuePair(gene.getAnnotationUniprotId(), gene.getEvalue()); System.out.println("currentGeneEValuePair.id = " + currentGeneEValuePair.id); System.out.println("currentGeneEValuePair.eValue = " + currentGeneEValuePair.eValue); boolean blastContainsGene = false; for (GeneEValuePair geneEValuePair : featuresBlast) { if (geneEValuePair.id.equals(currentGeneEValuePair.id)) { blastContainsGene = true; //le pongo la e que tiene en el wu-blast para poder comparar currentGeneEValuePair.eValue = geneEValuePair.eValue; break; } } if (blastContainsGene) { outBuff.write("The protein was found in the WU-BLAST result.. \n"); //Una vez que se que esta en el blast tengo que ver que sea la mejor GeneEValuePair first = featuresBlast.first(); outBuff.write("Protein with best eValue according to the WU-BLAST result: " + first.id + " , " + first.eValue + "\n"); if (first.id.equals(currentGeneEValuePair.id)) { outBuff.write("Proteins with best eValue match up \n"); } else { if (first.eValue == currentGeneEValuePair.eValue) { outBuff.write( "The one with best eValue is not the same protein but has the same eValue \n"); } else if (first.eValue > currentGeneEValuePair.eValue) { outBuff.write( "The one with best eValue is not the same protein but has a worse eValue :) \n"); } else { outBuff.write( "The best protein from BLAST has an eValue smaller than ours, checking if it's part of the reference set...\n"); //System.exit(-1); if (proteinsReferenceSet.contains(first.id)) { //The protein is in the reference set and that shouldn't happen outBuff.write( "The protein was found on the reference set, checking if it belongs to the same contig...\n"); String iterationSt = blastProteinsMap.get(gene.getAnnotationUniprotId()); if (iterationSt != null) { outBuff.write( "The protein was found in the BLAST used at the beginning of the annotation process.\n"); Iteration iteration = new Iteration(iterationSt); ArrayList<Hit> hits = iteration.getIterationHits(); boolean contigFound = false; Hit errorHit = null; for (Hit hit : hits) { if (hit.getHitDef().indexOf(contig.getId()) >= 0) { contigFound = true; errorHit = hit; break; } } if (contigFound) { outBuff.write( "ERROR: A hit from the same contig was find in the Blast file: \n" + errorHit.toString() + "\n"); } else { outBuff.write("There is no hit with the same contig! :)\n"); } } else { outBuff.write( "The protein is NOT in the BLAST used at the beginning of the annotation process.\n"); } } else { //The protein was not found on the reference set so everything's ok outBuff.write( "The protein was not found on the reference, everything's ok :)\n"); } } } } else { outBuff.write("The protein was NOT found on the WU-BLAST !! :( \n"); //System.exit(-1); } } } } catch (Exception ex) { ex.printStackTrace(); } finally { try { //closing outputfile outBuff.close(); } catch (IOException ex) { Logger.getLogger(AutomaticQualityControl.class.getName()).log(Level.SEVERE, null, ex); } } } }
From source file:io.anserini.index.UpdateIndex.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(//from ww w. ja v a 2s . c om OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(UpdateIndex.class.getName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); textOptions.setStoreTermVectors(true); LOG.info("index: " + indexPath); File file = new File("PittsburghUserTimeline"); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } final StatusStream stream = new JsonStatusCorpusReader(file); Status status; String s; HashMap<Long, String> hm = new HashMap<Long, String>(); try { while ((s = stream.nextRaw()) != null) { try { status = DataObjectFactory.createStatus(s); if (status.getText() == null) { continue; } hm.put(status.getUser().getId(), hm.get(status.getUser().getId()) + status.getText().replaceAll("[\\r\\n]+", " ")); } catch (Exception e) { } } } catch (Exception e) { e.printStackTrace(); } finally { stream.close(); } ArrayList<String> userIDList = new ArrayList<String>(); try (BufferedReader br = new BufferedReader(new FileReader(new File("userID")))) { String line; while ((line = br.readLine()) != null) { userIDList.add(line.replaceAll("[\\r\\n]+", "")); // process the line. } } try { reader = DirectoryReader .open(FSDirectory.open(new File(cmdline.getOptionValue(INDEX_OPTION)).toPath())); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } final Directory dir = new SimpleFSDirectory(Paths.get(cmdline.getOptionValue(INDEX_OPTION))); final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); final IndexWriter writer = new IndexWriter(dir, config); IndexSearcher searcher = new IndexSearcher(reader); System.out.println("The total number of docs indexed " + searcher.collectionStatistics(TweetStreamReader.StatusField.TEXT.name).docCount()); for (int city = 0; city < cityName.length; city++) { // Pittsburgh's coordinate -79.976389, 40.439722 Query q_long = NumericRangeQuery.newDoubleRange(TweetStreamReader.StatusField.LONGITUDE.name, new Double(longitude[city] - 0.05), new Double(longitude[city] + 0.05), true, true); Query q_lat = NumericRangeQuery.newDoubleRange(TweetStreamReader.StatusField.LATITUDE.name, new Double(latitude[city] - 0.05), new Double(latitude[city] + 0.05), true, true); BooleanQuery bqCityName = new BooleanQuery(); Term t = new Term("place", cityName[city]); TermQuery query = new TermQuery(t); bqCityName.add(query, BooleanClause.Occur.SHOULD); System.out.println(query.toString()); for (int i = 0; i < cityNameAlias[city].length; i++) { t = new Term("place", cityNameAlias[city][i]); query = new TermQuery(t); bqCityName.add(query, BooleanClause.Occur.SHOULD); System.out.println(query.toString()); } BooleanQuery bq = new BooleanQuery(); BooleanQuery finalQuery = new BooleanQuery(); // either a coordinate match bq.add(q_long, BooleanClause.Occur.MUST); bq.add(q_lat, BooleanClause.Occur.MUST); finalQuery.add(bq, BooleanClause.Occur.SHOULD); // or a place city name match finalQuery.add(bqCityName, BooleanClause.Occur.SHOULD); TotalHitCountCollector totalHitCollector = new TotalHitCountCollector(); // Query hasFieldQuery = new ConstantScoreQuery(new // FieldValueFilter("timeline")); // // searcher.search(hasFieldQuery, totalHitCollector); // // if (totalHitCollector.getTotalHits() > 0) { // TopScoreDocCollector collector = // TopScoreDocCollector.create(Math.max(0, // totalHitCollector.getTotalHits())); // searcher.search(finalQuery, collector); // ScoreDoc[] hits = collector.topDocs().scoreDocs; // // // HashMap<String, Integer> hasHit = new HashMap<String, Integer>(); // int dupcount = 0; // for (int i = 0; i < hits.length; ++i) { // int docId = hits[i].doc; // Document d; // // d = searcher.doc(docId); // // System.out.println(d.getFields()); // } // } // totalHitCollector = new TotalHitCountCollector(); searcher.search(finalQuery, totalHitCollector); if (totalHitCollector.getTotalHits() > 0) { TopScoreDocCollector collector = TopScoreDocCollector .create(Math.max(0, totalHitCollector.getTotalHits())); searcher.search(finalQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("City " + cityName[city] + " " + collector.getTotalHits() + " hits."); HashMap<String, Integer> hasHit = new HashMap<String, Integer>(); int dupcount = 0; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d; d = searcher.doc(docId); if (userIDList.contains(d.get(IndexTweets.StatusField.USER_ID.name)) && hm.containsKey(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name)))) { // System.out.println("Has timeline field?" + (d.get("timeline") != null)); // System.out.println(reader.getDocCount("timeline")); // d.add(new Field("timeline", hm.get(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name))), // textOptions)); System.out.println("Found a user hit"); BytesRefBuilder brb = new BytesRefBuilder(); NumericUtils.longToPrefixCodedBytes(Long.parseLong(d.get(IndexTweets.StatusField.ID.name)), 0, brb); Term term = new Term(IndexTweets.StatusField.ID.name, brb.get()); // System.out.println(reader.getDocCount("timeline")); Document d_new = new Document(); // for (IndexableField field : d.getFields()) { // d_new.add(field); // } // System.out.println(d_new.getFields()); d_new.add(new StringField("userBackground", d.get(IndexTweets.StatusField.USER_ID.name), Store.YES)); d_new.add(new Field("timeline", hm.get(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name))), textOptions)); // System.out.println(d_new.get()); writer.addDocument(d_new); writer.commit(); // t = new Term("label", "why"); // TermQuery tqnew = new TermQuery(t); // // totalHitCollector = new TotalHitCountCollector(); // // searcher.search(tqnew, totalHitCollector); // // if (totalHitCollector.getTotalHits() > 0) { // collector = TopScoreDocCollector.create(Math.max(0, totalHitCollector.getTotalHits())); // searcher.search(tqnew, collector); // hits = collector.topDocs().scoreDocs; // // System.out.println("City " + cityName[city] + " " + collector.getTotalHits() + " hits."); // // for (int k = 0; k < hits.length; k++) { // docId = hits[k].doc; // d = searcher.doc(docId); // System.out.println(d.get(IndexTweets.StatusField.ID.name)); // System.out.println(d.get(IndexTweets.StatusField.PLACE.name)); // } // } // writer.deleteDocuments(term); // writer.commit(); // writer.addDocument(d); // writer.commit(); // System.out.println(reader.getDocCount("timeline")); // writer.updateDocument(term, d); // writer.commit(); } } } } reader.close(); writer.close(); }
From source file:eu.fbk.dkm.sectionextractor.PageClassMerger.java
public static void main(String args[]) throws IOException { CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger(); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("WikiData ID file").isRequired().withLongOpt("wikidata-id").create("i")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("Airpedia Person file").isRequired().withLongOpt("airpedia").create("a")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Output file") .isRequired().withLongOpt("output").create("o")); CommandLine commandLine = null;/*from w w w. j av a 2 s .c o m*/ try { commandLine = commandLineWithLogger.getCommandLine(args); PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps()); } catch (Exception e) { System.exit(1); } String wikiIDFileName = commandLine.getOptionValue("wikidata-id"); String airpediaFileName = commandLine.getOptionValue("airpedia"); String outputFileName = commandLine.getOptionValue("output"); HashMap<Integer, String> wikiIDs = new HashMap<>(); HashSet<Integer> airpediaClasses = new HashSet<>(); List<String> strings; logger.info("Loading file " + wikiIDFileName); strings = Files.readLines(new File(wikiIDFileName), Charsets.UTF_8); for (String line : strings) { line = line.trim(); if (line.length() == 0) { continue; } if (line.startsWith("#")) { continue; } String[] parts = line.split("\t"); if (parts.length < 2) { continue; } int id; try { id = Integer.parseInt(parts[0]); } catch (Exception e) { continue; } wikiIDs.put(id, parts[1]); } logger.info("Loading file " + airpediaFileName); strings = Files.readLines(new File(airpediaFileName), Charsets.UTF_8); for (String line : strings) { line = line.trim(); if (line.length() == 0) { continue; } if (line.startsWith("#")) { continue; } String[] parts = line.split("\t"); if (parts.length < 2) { continue; } int id; try { id = Integer.parseInt(parts[0]); } catch (Exception e) { continue; } airpediaClasses.add(id); } logger.info("Saving information"); BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName)); for (int i : wikiIDs.keySet()) { if (!airpediaClasses.contains(i)) { continue; } writer.append(wikiIDs.get(i)).append("\n"); } writer.close(); }
From source file:mujava.cli.genmutes.java
public static void main(String[] args) throws Exception { // System.out.println("test"); genmutesCom jct = new genmutesCom(); String[] argv = { "-all", "-debug", "Flower" }; // development use, when release, // comment out this line JCommander jCommander = new JCommander(jct, args); // check session name if (jct.getParameters().size() > 1) { Util.Error("Has more parameters than needed."); return;//from w ww .ja va 2 s. co m } // set session name String sessionName = jct.getParameters().get(0); muJavaHomePath = Util.loadConfig(); // check if debug mode if (jct.isDebug()) { Util.debug = true; } // get all existing session name File folder = new File(muJavaHomePath); // check if the config file has defined the correct folder if (!folder.isDirectory()) { Util.Error("ERROR: cannot locate the folder specified in mujava.config"); return; } File[] listOfFiles = folder.listFiles(); // null checking // check the specified folder has files or not if (listOfFiles == null) { Util.Error("ERROR: no files in the muJava home folder: " + muJavaHomePath); return; } List<String> fileNameList = new ArrayList<>(); for (File file : listOfFiles) { fileNameList.add(file.getName()); } // check if session is already created. if (!fileNameList.contains(sessionName)) { Util.Error("Session does not exist."); return; } // get all files in the session String[] file_list = new String[1]; // if(jct.getD()) // { File sessionFolder = new File(muJavaHomePath + "/" + sessionName + "/src"); File[] listOfFilesInSession = sessionFolder.listFiles(); file_list = new String[listOfFilesInSession.length]; for (int i = 0; i < listOfFilesInSession.length; i++) { file_list[i] = listOfFilesInSession[i].getName(); } // get all mutation operators selected HashMap<String, List<String>> ops = new HashMap<String, List<String>>(); // used // for // add // random // percentage // and // maximum String[] paras = new String[] { "1", "0" }; if (jct.getAll()) // all is selected, add all operators { // if all is selected, all mutation operators are added ops.put("AORB", new ArrayList<String>(Arrays.asList(paras))); ops.put("AORS", new ArrayList<String>(Arrays.asList(paras))); ops.put("AOIU", new ArrayList<String>(Arrays.asList(paras))); ops.put("AOIS", new ArrayList<String>(Arrays.asList(paras))); ops.put("AODU", new ArrayList<String>(Arrays.asList(paras))); ops.put("AODS", new ArrayList<String>(Arrays.asList(paras))); ops.put("ROR", new ArrayList<String>(Arrays.asList(paras))); ops.put("COR", new ArrayList<String>(Arrays.asList(paras))); ops.put("COD", new ArrayList<String>(Arrays.asList(paras))); ops.put("COI", new ArrayList<String>(Arrays.asList(paras))); ops.put("SOR", new ArrayList<String>(Arrays.asList(paras))); ops.put("LOR", new ArrayList<String>(Arrays.asList(paras))); ops.put("LOI", new ArrayList<String>(Arrays.asList(paras))); ops.put("LOD", new ArrayList<String>(Arrays.asList(paras))); ops.put("ASRS", new ArrayList<String>(Arrays.asList(paras))); ops.put("SDL", new ArrayList<String>(Arrays.asList(paras))); ops.put("ODL", new ArrayList<String>(Arrays.asList(paras))); ops.put("VDL", new ArrayList<String>(Arrays.asList(paras))); ops.put("CDL", new ArrayList<String>(Arrays.asList(paras))); // ops.put("SDL", jct.getAll()); } else { // if not all, add selected ops to the list if (jct.getAORB()) { ops.put("AORB", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getAORS()) { ops.put("AORS", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getAOIU()) { ops.put("AOIU", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getAOIS()) { ops.put("AOIS", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getAODU()) { ops.put("AODU", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getAODS()) { ops.put("AODS", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getROR()) { ops.put("ROR", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getCOR()) { ops.put("COR", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getCOD()) { ops.put("COD", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getCOI()) { ops.put("COI", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getSOR()) { ops.put("SOR", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getLOR()) { ops.put("LOR", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getLOI()) { ops.put("LOI", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getLOD()) { ops.put("LOD", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getASRS()) { ops.put("ASRS", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getSDL()) { ops.put("SDL", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getVDL()) { ops.put("VDL", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getODL()) { ops.put("ODL", new ArrayList<String>(Arrays.asList(paras))); } if (jct.getCDL()) { ops.put("CDL", new ArrayList<String>(Arrays.asList(paras))); } } // add default option "all" if (ops.size() == 0) { ops.put("AORB", new ArrayList<String>(Arrays.asList(paras))); ops.put("AORS", new ArrayList<String>(Arrays.asList(paras))); ops.put("AOIU", new ArrayList<String>(Arrays.asList(paras))); ops.put("AOIS", new ArrayList<String>(Arrays.asList(paras))); ops.put("AODU", new ArrayList<String>(Arrays.asList(paras))); ops.put("AODS", new ArrayList<String>(Arrays.asList(paras))); ops.put("ROR", new ArrayList<String>(Arrays.asList(paras))); ops.put("COR", new ArrayList<String>(Arrays.asList(paras))); ops.put("COD", new ArrayList<String>(Arrays.asList(paras))); ops.put("COI", new ArrayList<String>(Arrays.asList(paras))); ops.put("SOR", new ArrayList<String>(Arrays.asList(paras))); ops.put("LOR", new ArrayList<String>(Arrays.asList(paras))); ops.put("LOI", new ArrayList<String>(Arrays.asList(paras))); ops.put("LOD", new ArrayList<String>(Arrays.asList(paras))); ops.put("ASRS", new ArrayList<String>(Arrays.asList(paras))); ops.put("SDL", new ArrayList<String>(Arrays.asList(paras))); ops.put("ODL", new ArrayList<String>(Arrays.asList(paras))); ops.put("VDL", new ArrayList<String>(Arrays.asList(paras))); ops.put("CDL", new ArrayList<String>(Arrays.asList(paras))); } // String[] tradional_ops = ops.toArray(new String[0]); // set system setJMutationStructureAndSession(sessionName); // MutationSystem.setJMutationStructureAndSession(sessionName); MutationSystem.recordInheritanceRelation(); // generate mutants generateMutants(file_list, ops); //System.exit(0); }