List of usage examples for java.util.concurrent ForkJoinPool getActiveThreadCount
public int getActiveThreadCount()
From source file:com.hygenics.parser.ParseJSoup.java
/** * Runs the Program/*ww w.java2 s .co m*/ */ public void run() { int its = 0; this.select = Properties.getProperty(this.select); this.extracondition = Properties.getProperty(this.extracondition); this.column = Properties.getProperty(this.column); createTables(); log.info("Starting Parse via JSoup @ " + Calendar.getInstance().getTime().toString()); ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procs); Set<Callable<ArrayList<String>>> collection; List<Future<ArrayList<String>>> futures; ArrayList<String> data = new ArrayList<String>((commitsize + 10)); ArrayList<String> outdata = new ArrayList<String>(((commitsize + 10) * 3)); int offenderhash = offset; boolean run = true; int iteration = 0; int currpos = 0; do { collection = new HashSet<Callable<ArrayList<String>>>(qnums); log.info("Getting Data"); // get data currpos = iteration * commitsize + offset; iteration += 1; String query = select; if (extracondition != null) { query += " " + extracondition; } if (extracondition != null) { query += " WHERE " + extracondition + " AND "; } else { query += " WHERE "; } for (int i = 0; i < qnums; i++) { if (currpos + (Math.round(commitsize / qnums * (i + 1))) < currpos + commitsize) { collection.add(new SplitQuery((query + pullid + " >= " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid + " < " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i + 1))))))); } else { collection.add(new SplitQuery((query + pullid + " >= " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid + " < " + Integer.toString(currpos + commitsize)))); } } if (collection.size() > 0) { futures = fjp.invokeAll(collection); int w = 0; while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) { w++; } for (Future<ArrayList<String>> f : futures) { try { // TODO Get Pages to Parse data.addAll(f.get()); } catch (NullPointerException e) { log.info("Some Data Returned Null"); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ExecutionException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } collection = new HashSet<Callable<ArrayList<String>>>(data.size()); // checkstring if (data.size() == 0 && checkstring != null && its <= maxchecks) { its++; collection.add(new SplitQuery(checkstring)); futures = fjp.invokeAll(collection); int w = 0; while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) { w++; } for (Future<ArrayList<String>> f : futures) { try { // TODO Get Pages to Parse data.addAll(f.get()); } catch (NullPointerException e) { log.info("Some Data Returned Null"); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ExecutionException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } if (data.size() == 0) { // set to stop if size is0 log.info("No Pages to Parse. Will Terminate"); run = false; } else { // parse log.info("Starting JSoup Parse @ " + Calendar.getInstance().getTime().toString()); for (String json : data) { // faster json reader is minimal json but faster parser is // Simple Json Map<String, Json> jMap = Json.read(json).asJsonMap(); if (jMap.containsKey("offenderhash")) { // string to int in case it is a string and has some // extra space offenderhash = Integer.parseInt(jMap.get("offenderhash").asString().trim()); } boolean allow = true; if (mustcontain != null) { if (jMap.get(column).asString().contains(mustcontain) == false) { allow = false; } } if (cannotcontain != null) { if (jMap.get(column).asString().contains(cannotcontain)) { allow = false; } } // this is the fastest way. I was learning before and will // rewrite when time permits. if (allow == true) { if (jMap.containsKey("offenderhash")) { if (this.singlepaths != null) { collection.add(new ParseSingle(Integer.toString(offenderhash), header, footer, pagenarrow, singlepaths, StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace, replaceSequence)); } if (this.multipaths != null) { collection.add(new ParseRows(Integer.toString(offenderhash), header, footer, pagenarrow, multipaths, StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace, replaceSequence)); } if (this.recordpaths != null) { collection.add(new ParseLoop(Integer.toString(offenderhash), header, footer, pagenarrow, recordpaths, StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace, replaceSequence)); } } } offenderhash += 1; } // complete parse log.info("Waiting for Parsing to Complete."); if (collection.size() > 0) { futures = fjp.invokeAll(collection); int w = 0; while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) { w++; } log.info("Waited for " + Integer.toString(w) + " Cycles!"); for (Future<ArrayList<String>> f : futures) { try { outdata.addAll(f.get()); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ExecutionException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } log.info("Finished Parsing @ " + Calendar.getInstance().getTime().toString()); int cp = 0; // post data log.info("Posting Data @ " + Calendar.getInstance().getTime().toString()); if (outdata.size() > 0) { for (int i = 0; i < qnums; i++) { ArrayList<String> od = new ArrayList<String>( ((cp + (Math.round(outdata.size() / qnums) - cp)))); if (cp + (Math.round(outdata.size() / qnums)) < outdata.size()) { od.addAll(outdata.subList(cp, (cp + (Math.round(outdata.size() / qnums))))); } else { od.addAll(outdata.subList(cp, (outdata.size() - 1))); } fjp.execute(new SplitPost(template, od)); cp += Math.round(outdata.size() / qnums); } int w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + Integer.toString(w) + " cycles!"); } log.info("Finished Posting to DB @ " + Calendar.getInstance().getTime().toString()); // size should remain same with 10 slot buffer room data.clear(); outdata.clear(); } // my favorite really desperate attempt to actually invoke garbage // collection because of MASSIVE STRINGS System.gc(); Runtime.getRuntime().gc(); } while (run); log.info("Shutting Down FJP"); // shutdown fjp if (fjp.isShutdown() == false) { fjp.shutdownNow(); } log.info("Finished Parsing @ " + Calendar.getInstance().getTime().toString()); }
From source file:com.hygenics.parser.ParseDispatcher.java
/** * Fork/Join Pool Solution Maximizes Speed. JSon increases ease of use * //w w w .j ava2 s.c o m */ public void run() { log.info("Starting Clock and Parsing @" + Calendar.getInstance().getTime().toString()); long t = Calendar.getInstance().getTimeInMillis(); int pid = 0; int id = 0; int checkattempts = 0; String add = null; this.schema = Properties.getProperty(this.schema); this.select = Properties.getProperty(this.select); this.extracondition = Properties.getProperty(this.extracondition); this.column = Properties.getProperty(this.column); ArrayList<String> parsedrows = new ArrayList<String>(); Set<Callable<String>> collect = new HashSet<Callable<String>>(); List<Future<String>> futures; List<Future<ArrayList<String>>> qfutures; Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>(4); ForkJoinPool fjp = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * procnum)); if (schema != null) { createTables(); } boolean run = true; String condition; int w = 0; int start = offset; int chunksize = (int) Math.ceil(pullsize / qnum); // attempt to query the database from multiple threads do { // query for pages pages = new ArrayList<String>(pullsize); log.info("Looking for Pages."); for (int conn = 0; conn < qnum; conn++) { // create condition condition = " WHERE " + pullid + " >= " + (start + (conn * chunksize)) + " AND " + pullid + " < " + Integer.toString(start + (chunksize * (conn + 1))); if (extracondition != null) { condition += " " + extracondition.trim(); } // get queries qcollect.add(new SplitQuery(template, (select + condition))); log.info("Fetching " + select + condition); } start += (chunksize * qnum); qfutures = fjp.invokeAll(qcollect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + w + " cycles"); for (Future<ArrayList<String>> f : qfutures) { try { ArrayList<String> test = f.get(); if (test != null) { if (test.size() > 0) { pages.addAll(test); } } if (f.isDone() == false) { f.cancel(true); } f = null; } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } qcollect = new HashSet<Callable<ArrayList<String>>>(4); qfutures = null; log.info("Finished Getting Pages"); // if no records then get records that may have been dropped if (pages.size() == 0 && checkstring != null && checkstring.trim().length() > 0 && checkattempts < reattempts) { checkattempts += 1; log.info("Checking for Drops"); qcollect.add(new SplitQuery(template, (checkstring))); qfutures = fjp.invokeAll(qcollect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + w + " cycles"); for (Future<ArrayList<String>> f : qfutures) { try { ArrayList<String> test = f.get(); if (test != null) { if (test.size() > 0) { pages.addAll(test); } } if (f.isDone() == false) { f.cancel(true); } f = null; } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } qfutures = null; qcollect = new HashSet<Callable<ArrayList<String>>>(4); } else if (checkattempts >= reattempts) { pages.clear(); } log.info("Found " + pages.size() + " records!"); // get hashes if necessary if (getHash) { log.info("Hashing " + pages.size() + " Records"); ArrayList<String> hashedrows = new ArrayList<String>(); for (String row : pages) { collect.add(new CreateHash(row, pid)); pid++; } log.info("Invoking"); futures = fjp.invokeAll(collect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited " + w + " Cycles!"); for (Future<String> f : futures) { if (f != null) { String json; try { json = f.get(termtime, TimeUnit.MILLISECONDS); if (json != null) { hashedrows.add(json); } } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } } log.info("Hashed " + hashedrows.size() + " Records!"); pages = hashedrows; collect = new HashSet<Callable<String>>(pullsize); futures.clear(); log.info("Completed Hashing"); } log.info("Performing Regex"); // handle single patterns int i = 0; if (singlepats != null) { log.info("Found Singlepats"); int subs = 0; int rows = 0; for (String row : pages) { rows += 1; String inrow = row; try { inrow = inrow.replaceAll("\t|\r|\r\n|\n", ""); Map<String, Json> jmap = Json.read(inrow).asJsonMap(); if (singlepats.containsKey("table")) { subs += 1; if (fjp.isShutdown()) { fjp = new ForkJoinPool((Runtime.getRuntime().availableProcessors() * procnum)); } if (jmap.get(column) != null) { if (test) { System.out.println("//////////////////////HTML////////////////////////\n" + jmap.get(column).asString() + "\n///////////////////////////////END///////////////////////////\n\n"); } if (mustcontain != null) { if (jmap.get(column).asString().contains(mustcontain)) { if (cannotcontain != null) { if (jmap.get(column).asString().contains(cannotcontain) == false) collect.add(new ParsePage(unescape, replacementPattern, singlepats.get("table"), jmap.get(column).asString().replaceAll("\\s\\s", " "), singlepats, Calendar.getInstance().getTime().toString(), jmap.get("offenderhash").asString())); } else { collect.add(new ParsePage(unescape, replacementPattern, singlepats.get("table"), jmap.get(column).asString().replaceAll("\\s\\s", " "), singlepats, Calendar.getInstance().getTime().toString(), jmap.get("offenderhash").asString())); } } } else if (cannotcontain != null) { if (jmap.get(column).asString().contains(cannotcontain) == false) { collect.add( new ParsePage(unescape, replacementPattern, singlepats.get("table"), jmap.get(column).asString().replaceAll("\\s\\s", " "), singlepats, Calendar.getInstance().getTime().toString(), jmap.get("offenderhash").asString())); } } else { collect.add(new ParsePage(unescape, replacementPattern, singlepats.get("table"), jmap.get(column).asString().replaceAll("\\s\\s", " "), singlepats, Calendar.getInstance().getTime().toString(), jmap.get("offenderhash").asString())); } } } i++; if (((i % commit_size) == 0 & i != 0) || i == pages.size() || pages.size() == 1 && singlepats != null) { log.info("Getting Regex Results"); log.info("Getting Tasks"); futures = fjp.invokeAll(collect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + w + " cycles"); for (Future<String> r : futures) { try { add = r.get(); if (add.contains("No Data") == false) { parsedrows.add(add); } add = null; } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } futures = null; collect = new HashSet<Callable<String>>(); if (parsedrows.size() >= commit_size) { log.info("INSERTING " + parsedrows.size() + " records!"); if (parsedrows.size() >= SPLITSIZE) { sendToDb(parsedrows, true); } else { sendToDb(parsedrows, false); } parsedrows = new ArrayList<String>(pullsize); } // hint to the gc in case it actually pays off; use // -X:compactexplicitgc to improve odds and // -XX:UseConcMarkSweepGC for improving odds on // older generation strings // (think if i were a gambling man) System.gc(); Runtime.getRuntime().gc(); } } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } log.info("Submitted " + subs + " records. Found " + rows + " rows"); } log.info("REMAINING ROWS TO COMMIT " + parsedrows.size()); log.info("Rows Left" + parsedrows.size()); if (parsedrows.size() > 0) { if (parsedrows.size() >= SPLITSIZE) { sendToDb(parsedrows, true); } else { sendToDb(parsedrows, false); } parsedrows = new ArrayList<String>(); } // handle multi patterns if (multipats != null) { // parse multiple pages for the run int subs = 0; for (String row : pages) { try { for (String k : multipats.keySet()) { if (fjp.isShutdown()) { fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors()); } Map<String, Json> jmap = Json.read(row).asJsonMap(); if (jmap.get(column) != null) { subs += 1; if (test) { System.out.println("//////////////////////HTML////////////////////////\n" + jmap.get(column).asString() + "\n///////////////////////////////END///////////////////////////\n\n"); } if (mustcontain != null) { if (jmap.get(column).asString().contains(mustcontain)) { if (cannotcontain != null) { if (jmap.get(column).asString().contains(cannotcontain) == false) { collect.add( new ParseMultiPage(unescape, replacementPattern, k, jmap.get(column).asString().replaceAll("\\s\\s", " "), jmap.get("offenderhash").asString(), Calendar.getInstance().getTime().toString(), multipats.get(k))); } } else { collect.add(new ParseMultiPage(unescape, replacementPattern, k, jmap.get(column).asString(), jmap.get("offenderhash").asString().replaceAll("\\s\\s", " "), Calendar.getInstance().getTime().toString(), multipats.get(k))); } } } else if (cannotcontain != null) { if (jmap.get(column).asString().contains(cannotcontain) == false) { collect.add(new ParseMultiPage(unescape, replacementPattern, k, jmap.get(column).asString().replaceAll("\\s\\s", " "), jmap.get("offenderhash").asString(), Calendar.getInstance().getTime().toString(), multipats.get(k))); } } else { collect.add(new ParseMultiPage(unescape, replacementPattern, k, jmap.get(column).asString().replaceAll("\\s\\s", " "), jmap.get("offenderhash").asString(), Calendar.getInstance().getTime().toString(), multipats.get(k))); } } i++; if (((i % commit_size) == 0 & i != 0) || i == pages.size() || pages.size() == 1 && multipats != null) { futures = fjp.invokeAll(collect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited " + w + " Cycles"); for (Future<String> r : futures) { try { add = r.get(); if (add.contains("No Data") == false) { for (String js : add.split("~")) { parsedrows.add(js); } } add = null; if (r.isDone() == false) { r.cancel(true); } r = null; } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ExecutionException e) { // TODO Auto-generated catch block e.printStackTrace(); } } futures = null; collect = new HashSet<Callable<String>>(); if (parsedrows.size() >= commit_size) { log.info("INSERTING " + parsedrows.size() + " records!"); if (parsedrows.size() >= SPLITSIZE) { sendToDb(parsedrows, true); } else { sendToDb(parsedrows, false); } parsedrows = new ArrayList<String>(pullsize); } // hint to the gc in case it actually pays off System.gc(); Runtime.getRuntime().gc(); } } } catch (Exception e) { log.warn("Encoding Error!"); } } log.info("Submitted " + subs + " records."); } // handle looped patterns if (loopedpats != null) { log.info("Looped Patterns Found"); int subs = 0; if (fjp.isShutdown()) { fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum); } for (String row : pages) { try { for (String k : loopedpats.keySet()) { if (fjp.isShutdown()) { fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum); } Map<String, Json> jmap = Json.read(row).asJsonMap(); if (jmap.get(column) != null) { subs += 1; if (mustcontain != null) { if (jmap.get(column).asString().contains(mustcontain)) { if (cannotcontain != null) { if (jmap.get(column).asString().contains(cannotcontain) == false) { collect.add( new LoopRegex(unescape, jmap.get(column).asString().replaceAll("\\s\\s", " "), jmap.get("offenderhash").asString(), Calendar.getInstance().getTime().toString(), k, replacementPattern, loopedpats.get(k), test)); } } else { collect.add(new LoopRegex(unescape, jmap.get(column).asString().replaceAll("\\s\\s", " "), jmap.get("offenderhash").asString(), Calendar.getInstance().getTime().toString(), k, replacementPattern, loopedpats.get(k), test)); } } } else if (cannotcontain != null) { if (jmap.get(column).asString().contains(cannotcontain) == false) { collect.add(new LoopRegex(unescape, jmap.get(column).asString().replaceAll("\\s\\s", " "), jmap.get("offenderhash").asString(), Calendar.getInstance().getTime().toString(), k, replacementPattern, loopedpats.get(k), test)); } } else { collect.add(new LoopRegex(unescape, jmap.get(column).asString().replaceAll("\\s\\s", " "), jmap.get("offenderhash").asString(), Calendar.getInstance().getTime().toString(), k, replacementPattern, loopedpats.get(k), test)); } jmap.remove(k); } i++; if (((i % commit_size) == 0 & i != 0) || (i % (pages.size() - 1)) == 0 || pages.size() == 1) { futures = fjp.invokeAll(collect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited " + w + " Cycles"); for (Future<String> r : futures) { try { add = r.get(); if (add.contains("No Data") == false) { for (String toarr : add.split("~")) { parsedrows.add(toarr); } } if (r.isDone() == false) { r.cancel(true); } add = null; } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } futures = null; collect = new HashSet<Callable<String>>(); // hint to the gc in case it actually pays off System.gc(); Runtime.getRuntime().gc(); } } if (parsedrows.size() >= this.commit_size) { log.info("INSERTING " + parsedrows.size() + " records!"); if (parsedrows.size() >= SPLITSIZE) { sendToDb(parsedrows, true); } else { sendToDb(parsedrows, false); } parsedrows = new ArrayList<String>(pullsize); } } catch (Exception e) { log.warn("Encoding Error!"); } } log.info("Submitted " + subs + " records."); } if (collect.size() > 0) { log.info("Getting Last Regex Results for Iteration"); log.info("Getting Tasks"); futures = fjp.invokeAll(collect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + w + " cycles"); for (Future<String> r : futures) { try { add = r.get(); if (add.contains("No Data") == false) { parsedrows.add(add); } add = null; } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } futures = null; collect = new HashSet<Callable<String>>(pullsize); // hint to the gc in case it actually pays off; use // -X:compactexplicitgc to improve odds and // -XX:UseConcMarkSweepGC for improving odds on older generation // strings // (think if i were a gambling man) System.gc(); Runtime.getRuntime().gc(); } log.info("REMAINING ROWS TO COMMIT " + parsedrows.size()); log.info("Rows Left" + parsedrows.size()); if (parsedrows.size() > 0) { if (parsedrows.size() >= SPLITSIZE) { sendToDb(parsedrows, true); } else { sendToDb(parsedrows, false); } parsedrows = new ArrayList<String>(); } } while (pages != null && pages.size() > 0); // ensure that nothing is still caught in limbo // final parser to ensure that nothing is left out if (collect.size() > 0) { log.info("More Rows Caught in FJP, Completing Process"); futures = fjp.invokeAll(collect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited " + w + " Cycles"); for (Future<String> r : futures) { try { add = r.get(); if (add.contains("No Data") == false) { for (String js : add.split("~")) { parsedrows.add(js); } } add = null; if (r.isDone() == false) { r.cancel(true); } r = null; } catch (InterruptedException e) { e.printStackTrace(); } catch (ExecutionException e) { e.printStackTrace(); } } futures = null; collect = null; } // send any remaining parsed rows to the db if (parsedrows.size() > 0) { if (parsedrows.size() >= SPLITSIZE) { sendToDb(parsedrows, true); } else { sendToDb(parsedrows, false); } parsedrows = new ArrayList<String>(); } log.info("Shutting Down Fork Join Pool"); if (fjp.isShutdown() == false) { fjp.shutdownNow(); } fjp = null; log.info("Complete @" + Calendar.getInstance().getTime().toString()); log.info("Total Runtime(seconds): " + Double.toString((double) (Calendar.getInstance().getTimeInMillis() - t) / 1000)); // hint to the gc in case it actually pays off System.gc(); Runtime.getRuntime().gc(); }
From source file:com.hygenics.parser.GetImages.java
private void addFromFile() { File f = new File(fpath); ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors()); ArrayList<String> imageData = new ArrayList<String>(); int size = (int) Math.ceil(commitsize / numqueries); if (f.exists()) { // get the image data File[] list = f.listFiles(); int curr = 0; if (list != null) { for (File img : list) { curr += 1;//from www . j ava2s.co m if (img.isDirectory() == false && (img.getName().contains(".bmp") || img.getName().toLowerCase().contains(".jpg") || img.getName().toLowerCase().contains(".png") || img.getName().toLowerCase().contains("jpeg"))) { try { if (dbcondition == null || template .getJsonData(dbcondition.replace("$IMAGE$", img.getName().replaceAll("(?mis)" + imagepostfix, ""))) .size() > 0) { BufferedImage bi = ImageIO.read(img);// only // used // to // ensure // that // this // is an // image JsonObject jobj = new JsonObject(); jobj.add("offenderhash", img.getName().replaceAll("(?mis)" + imagepostfix, ""));// assumes // hash // is // file // name+postfix jobj.add("image", img.getName().replaceAll("(?mis)" + imagepostfix, "")); jobj.add("image_path", img.getName()); jobj.add("table", table); jobj.add("date", Calendar.getInstance().getTime().toString()); imageData.add(jobj.toString()); } } catch (IOException e) { log.info(img.getName() + " is not an Image!"); e.printStackTrace(); } catch (Exception e) { log.info("Error in Posting to Database."); e.printStackTrace(); } } // post if > commitsize if (imageData.size() >= commitsize || curr == list.length) { log.info("Posting to DB @ " + Calendar.getInstance().getTime().toString()); for (int i = 0; i < numqueries; i++) { if (((i + 1) * size) < imageData.size()) { fjp.execute(new ImagePost(imageData.subList((i * size), ((i + 1) * size)))); } else { fjp.execute(new ImagePost(imageData.subList((i * size), imageData.size()))); } } int w = 0; while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) { w++; } log.info("Waited for " + w + " cycles"); imageData.clear(); log.info("Finished Posting to DB @ " + Calendar.getInstance().getTime().toString()); } } } } else { log.error("File Path does Not Exist.Please Check Image Pull!"); } fjp.shutdown(); fjp = null; }
From source file:com.hygenics.parser.JDump.java
private void toFile() { ArrayList<String> archs = new ArrayList<String>(); List<Future<ArrayList<String>>> qfutures; Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>(4); ForkJoinPool fjp = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * procnum)); int dumped = 0; if (archive) { log.info("Cleaning"); for (String k : fpaths.keySet()) { String fpath = ""; for (String ofp : fpaths.get(k).keySet()) { fpath = ofp;// w ww.j a v a 2 s . c o m } if (fpath.length() > 0) { String[] barr = fpath.split("\\/"); String basefile = ""; Archiver zip = new Archiver(); for (int i = 0; i > barr.length - 1; i++) { basefile += (i == 0) ? barr[i] : "/" + barr[i]; } if (basefile.trim().length() > 0) { zip.setBasedirectory(basefile); zip.setZipDirectory(basefile + "archive.zip"); zip.setAvoidanceString(".zip|archive"); zip.setDelFiles(true); zip.run(); } } } } log.info("Dumping"); for (String table : fpaths.keySet()) { int offset = 0; if (template.checkTable(table, table.split("\\.")[0])) { if (template.getCount(table) > 0) { log.info("Dumping for " + table); // get header String select = "SELECT * FROM " + table; String fpath = null; ArrayList<String> jsons; String condition; int w = 0; int start = offset; int chunksize = (int) Math.ceil(pullsize / qnum); // get fpath for (String ofp : fpaths.get(table).keySet()) { start = fpaths.get(table).get(ofp); fpath = ofp; } // perform write if (headers != null && fpath != null) { List<String> headersList = headers.get(table); String output = null; boolean existed = true; if (addFileDate) { fpath = fpath + Calendar.getInstance().getTime().toString().trim().replaceAll(":|\\s", "") + ".txt"; } // check to see if file should be created if (!new File(fpath).exists()) { try { new File(fpath).createNewFile(); } catch (IOException e) { e.printStackTrace(); } existed = false; } // check to see if file must be recreated if (!append) { File f = new File(fpath); f.delete(); try { f.createNewFile(); } catch (IOException e) { e.printStackTrace(); } } if (headersList != null && (append == false || existed == false)) { for (String header : headersList) { output = (output == null) ? StringEscapeUtils.unescapeXml(header) : output + delimeter + StringEscapeUtils.unescapeXml(header); } } do { // get records jsons = new ArrayList<String>(pullsize); log.info("Looking for Pages."); for (int conn = 0; conn < qnum; conn++) { // create condition condition = " WHERE " + pullid + " >= " + (start + (conn * chunksize)) + " AND " + pullid + " < " + Integer.toString(start + (chunksize * (conn + 1))); if (extracondition != null) { condition += " " + extracondition.trim(); } // get queries qcollect.add(new SplitQuery(template, (select + condition))); log.info("Fetching " + select + condition); } start += (chunksize * qnum); qfutures = fjp.invokeAll(qcollect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + w + " cycles"); for (Future<ArrayList<String>> f : qfutures) { try { ArrayList<String> test = f.get(); if (test != null) { if (test.size() > 0) { jsons.addAll(test); } } if (f.isDone() == false) { f.cancel(true); } f = null; } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } qcollect = new HashSet<Callable<ArrayList<String>>>(4); qfutures = null; log.info("Finished Getting Pages"); // post records to the file try (FileWriter fw = new FileWriter(new File(fpath), true)) { // get and write headers if (jsons.size() > 0) { fw.write(output + "\n"); // write data for (String json : jsons) { output = null; JsonObject jo = JsonObject.readFrom(json); if (jo.size() >= headersList.size()) {// allows // trimming // of // table // to // key // aspects output = null; for (String key : headers.get(table)) { if (jo.get(key.toLowerCase()) != null) { String data = StringEscapeUtils .unescapeXml(jo.get(key.toLowerCase()).asString()); if (replacementPattern != null) { data = data.replaceAll(replacementPattern, ""); data = data.replace(delimeter, delimreplace); } output = (output == null) ? data.replaceAll("[^\u0020-\u0070 ]+", "") : output + delimeter + data.replaceAll("[^\u0020-\u0070 ]+", ""); } else { output += delimeter; } } if (output != null && output.trim().length() > headersList.size()) { fw.write(output + "\n"); } } else { if (jsons.size() == 0) { Log.info( "Number of Headers and Keys from Json Array and Headers List Impossible to Match"); try { throw new MismatchException( "Number of Headers: " + headersList.size() + " && Number of Keys: " + jo.size()); } catch (MismatchException e) { e.printStackTrace(); } } } output = null; } } else { log.info("EOF FOUND! No New Records in This Iteration....Stopping."); } } catch (IOException e) { e.printStackTrace(); } } while (jsons.size() > 0); } else { try { throw new NullPointerException( "No Headers Input to Class. Please Create the Requisite Map."); } catch (NullPointerException e) { e.printStackTrace(); } } dumped += 1; } else { try { throw new NoDataException("No Data Found in Table " + table); } catch (NoDataException e) { e.printStackTrace(); } } } else { log.info("Missing Table " + table); try { throw new NullPointerException("Table " + table + " Does Not Exist!!!"); } catch (NullPointerException e) { e.printStackTrace(); } } } // end LOOP if (!fjp.isShutdown()) { fjp.shutdownNow(); } if (dumped == 0) { log.error("No Data Found in Any Table"); System.exit(-1); } }
From source file:com.hygenics.parser.JDumpWithReference.java
private void toFile() { List<Future<ArrayList<String>>> qfutures; Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>(4); ForkJoinPool fjp = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * procnum)); int dumped = 0; if (archive) { log.info("Cleaning"); for (String k : fpaths.keySet()) { String fpath = ""; for (String ofp : fpaths.get(k).keySet()) { fpath = ofp;/*from w ww . java 2 s . c om*/ } if (fpath.length() > 0) { String[] barr = fpath.split("\\/"); String basefile = ""; Archiver zip = new Archiver(); for (int i = 0; i > barr.length - 1; i++) { basefile += (i == 0) ? barr[i] : "/" + barr[i]; } if (basefile.trim().length() > 0) { zip.setBasedirectory(basefile); zip.setZipDirectory(basefile + "archive.zip"); zip.setAvoidanceString(".zip|archive"); zip.setDelFiles(true); zip.run(); } } } } log.info("Dumping"); for (String table : fpaths.keySet()) { int offset = 0; if (template.checkTable(this.baseschema + "." + table, this.baseschema)) { if (template.getCount(this.baseschema + "." + table) > 0) { log.info("Dumping for " + table); // get header String select = "SELECT * FROM " + this.baseschema + "." + table; String fpath = null; ArrayList<String> jsons; String condition; int w = 0; int start = offset; int chunksize = (int) Math.ceil(pullsize / qnum); // get fpath for (String ofp : fpaths.get(table).keySet()) { start = fpaths.get(table).get(ofp); fpath = ofp; } // perform write if (headers != null && fpath != null) { List<String> headersList = headers.get(table); String output = null; boolean existed = true; if (addFileDate) { fpath = fpath + Calendar.getInstance().getTime().toString().trim().replaceAll(":|\\s", "") + ".txt"; } // check to see if file should be created if (!new File(fpath).exists()) { try { new File(this.baseFilePath + fpath).createNewFile(); } catch (IOException e) { e.printStackTrace(); } existed = false; } // check to see if file must be recreated if (!append) { File f = new File(this.baseFilePath + fpath); f.delete(); try { f.createNewFile(); } catch (IOException e) { e.printStackTrace(); } } if (headersList != null && (append == false || existed == false)) { for (String header : headersList) { output = (output == null) ? StringEscapeUtils.unescapeXml(header) : output + delimeter + StringEscapeUtils.unescapeXml(header); } } do { // get records jsons = new ArrayList<String>(pullsize); log.info("Looking for Pages."); for (int conn = 0; conn < qnum; conn++) { // create condition condition = " WHERE " + pullid + " >= " + (start + (conn * chunksize)) + " AND " + pullid + " < " + Integer.toString(start + (chunksize * (conn + 1))); if (extracondition != null) { condition += " " + extracondition.trim(); } // get queries qcollect.add(new SplitQuery(template, (select + condition))); log.info("Fetching " + select + condition); } start += (chunksize * qnum); qfutures = fjp.invokeAll(qcollect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + w + " cycles"); for (Future<ArrayList<String>> f : qfutures) { try { ArrayList<String> test = f.get(); if (test != null) { if (test.size() > 0) { jsons.addAll(test); } } if (f.isDone() == false) { f.cancel(true); } f = null; } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } qcollect = new HashSet<Callable<ArrayList<String>>>(4); qfutures = null; log.info("Finished Getting Pages"); // post records to the file try (FileWriter fw = new FileWriter(new File(this.baseFilePath + fpath), true)) { // get and write headers if (jsons.size() > 0) { fw.write(output + "\n"); // write data for (String json : jsons) { output = null; JsonObject jo = JsonObject.readFrom(json); if (jo.size() >= headersList.size()) {// allows // trimming // of // table // to // key // aspects output = null; for (String key : headers.get(table)) { if (jo.get(key.toLowerCase()) != null) { String data = StringEscapeUtils .unescapeXml(jo.get(key.toLowerCase()).asString()); if (replacementPattern != null) { data = data.replaceAll(replacementPattern, ""); data = data.replace(delimeter, delimreplace); } output = (output == null) ? data.replaceAll("[^\u0020-\u007E ]+", "") : output + delimeter + data.replaceAll("[^\u0020-\u007E ]+", ""); } else { output += delimeter; } } if (output != null && output.trim().length() > headersList.size()) { fw.write(output + "\n"); } } else { if (jsons.size() == 0) { Log.info( "Number of Headers and Keys from Json Array and Headers List Impossible to Match"); try { throw new MismatchException( "Number of Headers: " + headersList.size() + " && Number of Keys: " + jo.size()); } catch (MismatchException e) { e.printStackTrace(); } } } output = null; } } else { log.info("EOF FOUND! No New Records in This Iteration....Stopping."); } } catch (IOException e) { e.printStackTrace(); } } while (jsons.size() > 0); } else { try { throw new NullPointerException( "No Headers Input to Class. Please Create the Requisite Map."); } catch (NullPointerException e) { e.printStackTrace(); } } dumped += 1; } else { try { throw new NoDataException("No Data in Table " + table); } catch (NoDataException e) { e.printStackTrace(); } } } else { log.info("Missing Table " + table); try { throw new NullPointerException("Table " + table + " Does Not Exist!!!"); } catch (NullPointerException e) { e.printStackTrace(); } } } // end LOOP if (!fjp.isShutdown()) { fjp.shutdownNow(); } if (dumped == 0) { log.error("No Data found in Any Tables"); System.exit(-1); } }
From source file:MSUmpire.PeptidePeakClusterDetection.PDHandlerBase.java
protected void FindAllMzTracePeakCurves(ScanCollection scanCollection) throws IOException { // final HashSet<String> IncludedHashMap = new HashSet<>(); // Logger.getRootLogger().info("Processing all scans to detect possible m/z peak curves...."); Logger.getRootLogger().info("Processing all scans to detect possible m/z peak curves and"); Logger.getRootLogger().info("Smoothing detected signals......"); float preRT = 0f; //Loop for each scan in the ScanCollection final ArrayList<ForkJoinTask<ArrayList<PeakCurve>>> ftemp = new ArrayList<>(); final ForkJoinPool fjp = new ForkJoinPool(NoCPUs); final int idx_end = scanCollection.GetScanNoArray(MSlevel).size(); final int[] ia = new int[idx_end + 1]; ia[0] = 0;/*from w w w.j a v a 2 s. co m*/ for (int idx = 0; idx < idx_end; idx++) { final int scanNO = scanCollection.GetScanNoArray(MSlevel).get(idx); final ScanData sd = scanCollection.GetScan(scanNO); ia[idx + 1] = sd.Data.size() + ia[idx]; } final boolean[] included = new boolean[ia[ia.length - 1]]; if (step == -1) step = fjp.getParallelism() * 32; long peakCurvesCount = 0; for (int idx = 0; idx < idx_end; idx++) { int scanNO = scanCollection.GetScanNoArray(MSlevel).get(idx); ScanData scanData = scanCollection.GetScan(scanNO); //If we are doing targeted peak detection and the RT of current scan is not in the range of targeted list, jump to the next scan if (TargetedOnly && !FoundInInclusionRTList(scanData.RetentionTime)) { continue; } if (idx == 0) { preRT = scanData.RetentionTime - 0.01f; } for (int i = 0; i < scanData.PointCount(); i++) { XYData peak = scanData.Data.get(i); //If we are doing targeted peak detection and the RT and m/z of current peak is not in the range of targeted list, jump to the next peak if (TargetedOnly && !FoundInInclusionMZList(scanData.RetentionTime, peak.getX())) { continue; } if (peak.getX() < parameter.MinMZ) { continue; } //Check if the current peak has been included in previously developed peak curves // if (!IncludedHashMap.contains(scanNO + "_" + peak.getX())) {//The peak hasn't been included final int id_scanNO_peak = int_id(ia, idx, i); if (!included[id_scanNO_peak]) {//The peak hasn't been included //The current peak will be the starting peak of a new peak curve //Add it to the hash table // IncludedHashMap.add(scanNO + "_" + peak.getX()); included[id_scanNO_peak] = true; float startmz = peak.getX(); float startint = peak.getY(); //Find the maximum peak within PPM window as the starting peak for (int j = i + 1; j < scanData.PointCount(); j++) { XYData currentpeak = scanData.Data.get(j); final int id_scanNO_currentpeak = int_id(ia, idx, j); if (!included[id_scanNO_currentpeak]) { // if (!IncludedHashMap.contains(scanNO + "_" + currentpeak.getX())) { if (InstrumentParameter.CalcPPM(currentpeak.getX(), startmz) <= PPM) { included[id_scanNO_currentpeak] = true; // IncludedHashMap.add(scanNO + "_" + currentpeak.getX()); if (currentpeak.getY() >= startint) { startmz = currentpeak.getX(); startint = currentpeak.getY(); } } else { break; } } } //Initialize a new peak curve PeakCurve Peakcurve = new PeakCurve(parameter); //Add a background peak Peakcurve.AddPeak(preRT, startmz, scanData.background); //Add the starting peak Peakcurve.AddPeak(scanData.RetentionTime, startmz, startint); Peakcurve.StartScan = scanNO; int missedScan = 0; float endrt = scanData.RetentionTime; int endScan = scanData.ScanNum; float bk = 0f; //Starting from the next scan, find the following peaks given the starting peak for (int idx2 = idx + 1; idx2 < scanCollection.GetScanNoArray(MSlevel).size() && (missedScan < parameter.NoMissedScan /*|| (TargetedOnly && Peakcurve.RTWidth()<parameter.MaxCurveRTRange)*/); idx2++) { int scanNO2 = scanCollection.GetScanNoArray(MSlevel).get(idx2); ScanData scanData2 = scanCollection.GetScan(scanNO2); endrt = scanData2.RetentionTime; endScan = scanData2.ScanNum; bk = scanData2.background; float currentmz = 0f; float currentint = 0f; //If the scan is empty if (scanData2.PointCount() == 0) { if (parameter.FillGapByBK) { Peakcurve.AddPeak(scanData2.RetentionTime, Peakcurve.TargetMz, scanData2.background); } missedScan++; continue; } //Find the m/z index int mzidx = scanData2.GetLowerIndexOfX(Peakcurve.TargetMz); for (int pkidx = mzidx; pkidx < scanData2.Data.size(); pkidx++) { XYData currentpeak = scanData2.Data.get(pkidx); if (currentpeak.getX() < parameter.MinMZ) { continue; } //Check if the peak has been included or not final int int_id_scanNO2_currentpeak = int_id(ia, idx2, pkidx); // if (!included.get(int_id_scanNO2_currentpeak)) { if (!included[int_id_scanNO2_currentpeak]) { if (InstrumentParameter.CalcPPM(currentpeak.getX(), Peakcurve.TargetMz) > PPM) { if (currentpeak.getX() > Peakcurve.TargetMz) { break; } } else { //////////The peak is in the ppm window, select the highest peak included[int_id_scanNO2_currentpeak] = true; // IncludedHashMap.add(scanNO2 + "_" + currentpeak.getX()); if (currentint < currentpeak.getY()) { currentmz = currentpeak.getX(); currentint = currentpeak.getY(); } } } } //No peak in the PPM window has been found if (currentmz == 0f) { if (parameter.FillGapByBK) { Peakcurve.AddPeak(scanData2.RetentionTime, Peakcurve.TargetMz, scanData2.background); } missedScan++; } else { missedScan = 0; Peakcurve.AddPeak(scanData2.RetentionTime, currentmz, currentint); } } Peakcurve.AddPeak(endrt, Peakcurve.TargetMz, bk); Peakcurve.EndScan = endScan; //First check if the peak curve is in targeted list if (FoundInInclusionList(Peakcurve.TargetMz, Peakcurve.StartRT(), Peakcurve.EndRT())) { // LCMSPeakBase.UnSortedPeakCurves.add(Peakcurve); ++peakCurvesCount; ftemp.add(fjp.submit(new PeakCurveSmoothingUnit(Peakcurve, parameter))); //Then check if the peak curve passes the criteria } else if (Peakcurve.GetRawSNR() > LCMSPeakBase.SNR && Peakcurve.GetPeakList().size() >= parameter.MinPeakPerPeakCurve + 2) { // LCMSPeakBase.UnSortedPeakCurves.add(Peakcurve); ++peakCurvesCount; ftemp.add(fjp.submit(new PeakCurveSmoothingUnit(Peakcurve, parameter))); } else { Peakcurve = null; } } } preRT = scanData.RetentionTime; if (ReleaseScans) { scanData.dispose(); } /** the if statement below does PeakCurveSmoothing() and ClearRawPeaks() */ final boolean last_iter = idx + 1 == idx_end; if (ftemp.size() == step || last_iter) { final List<ForkJoinTask<ArrayList<PeakCurve>>> ftemp_sublist_view = last_iter ? ftemp : ftemp.subList(0, step / 2); for (final Future<ArrayList<PeakCurve>> f : ftemp_sublist_view) { try { LCMSPeakBase.UnSortedPeakCurves.addAll(f.get()); } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e); } } ftemp_sublist_view.clear(); if (!last_iter && fjp.getActiveThreadCount() < fjp.getParallelism()) { // System.out.println("PeakCurveSmoothingUnit: fjp.getActiveThreadCount()\t"+fjp.getActiveThreadCount()+"\t"+step); step *= 2; } } } assert ftemp.isEmpty(); //System.out.print("PSM removed (PeakCurve generation):" + PSMRemoved ); int i = 1; //Assign peak curve index for (PeakCurve peakCurve : LCMSPeakBase.UnSortedPeakCurves) { peakCurve.Index = i++; } System.gc(); // Logger.getRootLogger().info(LCMSPeakBase.UnSortedPeakCurves.size() + " Peak curves found (Memory usage:" + Math.round((Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1048576) + "MB)"); Logger.getRootLogger() .info(peakCurvesCount + " Peak curves found (Memory usage:" + Math.round( (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1048576) + "MB)"); }