Example usage for java.util.concurrent ForkJoinPool ForkJoinPool

List of usage examples for java.util.concurrent ForkJoinPool ForkJoinPool

Introduction

In this page you can find the example usage for java.util.concurrent ForkJoinPool ForkJoinPool.

Prototype

private ForkJoinPool(byte forCommonPoolOnly) 

Source Link

Document

Constructor for common pool using parameters possibly overridden by system properties

Usage

From source file:com.hygenics.parser.BreakMultiple.java

/**
 * run the class/*from  w  ww  .  j  a v  a  2  s.co  m*/
 */
public void run() {
    int j = 0;
    checkTable();
    rows = new ArrayList<String>();
    log.info("Starting Break");

    // the pool
    ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum);

    // for returned results
    List<Future<ArrayList<String>>> futures = new ArrayList<Future<ArrayList<String>>>();

    // for parsing
    Set<Callable<ArrayList<String>>> collect = new HashSet<Callable<ArrayList<String>>>();

    // for querying
    Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>();

    // results
    ArrayList<String> jsons = new ArrayList<String>();

    String condition = null;
    int size = (int) Math.ceil(pullsize / qnum);
    // get initial data from user
    for (int i = 0; i < qnum; i++) {
        condition = " WHERE " + idcolumn + " > " + Integer.toString(offset + (Math.round(pullsize / qnum) * i))
                + " AND " + idcolumn + " <= "
                + Integer.toString(offset + (Math.round(pullsize / qnum) * (i + 1)));

        if (extracondition != null) {
            condition += " " + extracondition.trim();
        }

        qcollect.add(new GetFromDB((select + condition), template));
        log.info("SELECTING " + select + " " + condition);
    }

    log.info("Getting From DB @" + Calendar.getInstance().getTime().toString());
    futures = fjp.invokeAll(qcollect);

    int w = 0;
    while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
        w++;
    }

    log.info("Waited for " + w + "Cycles");

    for (Future<ArrayList<String>> f : futures) {
        try {
            rows.addAll(f.get());
            f.cancel(true);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ExecutionException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    qcollect = new HashSet<Callable<ArrayList<String>>>();
    futures = null;

    log.info("Breaking");
    // process while there is still data to process
    while (rows.size() > 0) {
        log.info("Iteration Contains " + rows.size() + " Rows");
        // add to the commit size for future processing
        offset += pullsize;
        log.info("Submitting Tasks");
        // submit for breaking apart

        for (String r : rows) {

            if (fjp.isShutdown()) {
                fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum);
            }

            if (r != null) {

                if (mustcontain != null) {
                    if (r.contains(mustcontain)) {
                        if (cannotcontain != null) {
                            if (r.contains(cannotcontain) == false) {
                                Map<String, Json> rowmap = Json.read(r).asJsonMap();

                                // final getDAOTemplate template, final
                                // String row, final String token, final
                                // String replacementPattern, final
                                // Map<String,String> positions,final String
                                // date, final String table, final String
                                // offenderhash
                                if (rowmap.size() > 0) {
                                    collect.add(new Break(unescape, repeatkeys, template,
                                            rowmap.get(rowcolumn).asString(), token, replacementPattern,
                                            positions, (Calendar.getInstance().getTime().toString()),
                                            targettable, rowmap.get("offenderhash").asString(), maxpos,
                                            genhash));
                                }
                            }
                        } else {
                            Map<String, Json> rowmap = Json.read(r).asJsonMap();

                            // final getDAOTemplate template, final String
                            // row, final String token, final String
                            // replacementPattern, final Map<String,String>
                            // positions,final String date, final String
                            // table, final String offenderhash
                            if (rowmap.size() > 0) {
                                collect.add(new Break(unescape, repeatkeys, template,
                                        rowmap.get(rowcolumn).asString(), token, replacementPattern, positions,
                                        (Calendar.getInstance().getTime().toString()), targettable,
                                        rowmap.get("offenderhash").asString(), maxpos, genhash));
                            }
                        }
                    }
                } else {

                    if (cannotcontain != null) {
                        if (r.contains(cannotcontain) == false) {
                            Map<String, Json> rowmap = Json.read(r).asJsonMap();

                            // to ascend you must die, to die you must be
                            // crucified; so get off your -- cross so that
                            // we can nail down the nex martyr
                            // final getDAOTemplate template, final String
                            // row, final String token, final String
                            // replacementPattern, final Map<String,String>
                            // positions,final String date, final String
                            // table, final String offenderhash
                            if (rowmap.size() > 0) {
                                collect.add(new Break(unescape, repeatkeys, template,
                                        rowmap.get(rowcolumn).asString(), token, replacementPattern, positions,
                                        (Calendar.getInstance().getTime().toString()), targettable,
                                        rowmap.get("offenderhash").asString(), maxpos, genhash));
                            }
                        }
                    } else {
                        Map<String, Json> rowmap = Json.read(r).asJsonMap();

                        // final getDAOTemplate template, final String row,
                        // final String token, final String
                        // replacementPattern, final Map<String,String>
                        // positions,final String date, final String table,
                        // final String offenderhash
                        if (rowmap.size() > 0) {
                            collect.add(new Break(unescape, repeatkeys, template,
                                    rowmap.get(rowcolumn).asString(), token, replacementPattern, positions,
                                    (Calendar.getInstance().getTime().toString()), targettable,
                                    rowmap.get("offenderhash").asString(), maxpos, genhash));
                        }
                    }
                }
            }
        }

        log.info("SUBMITTED " + collect.size() + " tasks");

        futures = fjp.invokeAll(collect);

        w = 0;

        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
            w++;
        }

        log.info("Waited for " + w + " Cycles");

        jsons.clear();
        log.info("Getting Strings");
        try {

            for (Future<ArrayList<String>> p : futures) {
                ArrayList<String> retlist = p.get();

                if (retlist != null) {
                    if (retlist.size() > 0) {
                        jsons.addAll(retlist);
                    }

                    if (jsons.size() >= commit_size) {
                        // send to db
                        if (jsons.size() > SPLITSIZE) {
                            log.info("Split True: Sending to DB @ "
                                    + Calendar.getInstance().getTime().toString());

                            postToDb(jsons, true);
                            jsons = new ArrayList<String>();
                            log.info("Posted to DB @ " + Calendar.getInstance().getTime().toString());
                        } else {
                            log.info("Split False: Sending to DB @ "
                                    + Calendar.getInstance().getTime().toString());
                            postToDb(jsons, false);
                            jsons = new ArrayList<String>();
                            log.info("Posted to DB @ " + Calendar.getInstance().getTime().toString());
                        }
                    }
                }
                p.cancel(true);
            }
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ExecutionException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        futures = null;
        collect = new HashSet<Callable<ArrayList<String>>>();

        // send to db
        if (jsons.size() > SPLITSIZE) {
            log.info("Split True: Sending to DB @" + Calendar.getInstance().getTime().toString());
            postToDb(jsons, true);
            jsons = new ArrayList<String>();
            log.info("Posted to DB @ " + Calendar.getInstance().getTime().toString());
        } else {
            log.info("Split False: Sending to DB @" + Calendar.getInstance().getTime().toString());
            postToDb(jsons, false);
            jsons = new ArrayList<String>();
            log.info("Posted to DB @ " + Calendar.getInstance().getTime().toString());
        }

        // get more information
        rows = new ArrayList<String>();

        if (Runtime.getRuntime().freeMemory() < 500000 | ((loops % waitloops) == 0 & waitloops != 0)) {
            log.info("Paused Free Memory Left: " + Runtime.getRuntime().freeMemory());
            System.gc();
            Runtime.getRuntime().gc();

            try {
                Thread.sleep(2000);
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            while (Runtime.getRuntime().freeMemory() < 500000) {
                try {
                    Thread.sleep(2000);
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }

            log.info("Restart Free Memory Left: " + Runtime.getRuntime().freeMemory());
        }

        rows = new ArrayList<String>();

        // attempt to query the database from multiple threads
        for (int conn = 1; conn <= qnum; conn++) {
            // change condition
            condition = " WHERE " + idcolumn + " > "
                    + Integer.toString(offset + (Math.round(pullsize / qnum) * conn)) + " AND " + idcolumn
                    + " <= " + Integer.toString(offset + (Math.round(pullsize / qnum) * (conn + 1)));

            if (extracondition != null) {
                condition += " " + extracondition.trim();
            }

            qcollect.add(new GetFromDB((select + condition), template));
            log.info("SELECTING " + select + " " + condition);
        }

        futures = fjp.invokeAll(qcollect);

        w = 0;

        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
            w++;
        }

        log.info("Waited for " + w + " Cycles");

        for (Future<ArrayList<String>> f : futures) {
            try {

                ArrayList<String> test = f.get();

                if (test != null) {
                    if (test.size() > 0) {
                        rows.addAll(test);
                    }
                }

                f.cancel(true);

            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (ExecutionException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

        futures = null;
        qcollect = new HashSet<Callable<ArrayList<String>>>(4);

        j++;

        Runtime.getRuntime().gc();
        System.gc();

    }

    // send to db
    if (jsons.size() > SPLITSIZE) {
        log.info("Split True: Sending to DB @" + Calendar.getInstance().getTime().toString());
        postToDb(jsons, true);
        jsons = new ArrayList<String>();
    } else if (jsons.size() > 0) {
        log.info("Split False: Sending to DB @" + Calendar.getInstance().getTime().toString());
        postToDb(jsons, false);
        jsons = new ArrayList<String>();
    }

    Runtime.getRuntime().gc();
    System.gc();

    log.info("Shutting Down Forkjoin Pool");
    if (fjp.isShutdown() == false) {
        fjp.shutdownNow();
    }
}

From source file:com.hygenics.parser.GetImages.java

private void getImages() {
    // controls the web process from a removed method
    log.info("Setting Up Pull");
    String[] proxyarr = (proxies == null) ? null : proxies.split(",");
    // cleanup//w  ww  . ja  v a  2s  .c  o  m
    if (cleanup) {
        cleanupDir(fpath);
    }

    // image grab
    CookieManager cm = new CookieManager();
    cm.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
    CookieHandler.setDefault(cm);
    int numimages = 0;
    InputStream is;
    byte[] bytes;
    int iter = 0;
    int found = 0;

    // set proxy if needed
    if (proxyuser != null) {
        proxy(proxyhost, proxyport, https, proxyuser, proxypass);
    }

    int i = 0;
    ArrayList<String> postImages = new ArrayList<String>();
    ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors());
    Set<Callable<String>> pulls = new HashSet<Callable<String>>();
    Set<Callable<ArrayList<String>>> sqls = new HashSet<Callable<ArrayList<String>>>();
    List<Future<String>> imageFutures;

    ArrayList<String> images;
    int chunksize = (int) Math.ceil(commitsize / numqueries);
    log.info("Chunksize: " + chunksize);
    if (baseurl != null || baseurlcolumn != null) {
        do {
            log.info("Offset: " + offset);
            log.info("Getting Images");
            images = new ArrayList<String>(commitsize);
            log.info("Getting Columns");
            for (int n = 0; n < numqueries; n++) {
                String tempsql = sql + " WHERE " + idString + " >= " + offset + " AND " + idString + " < "
                        + (offset + chunksize);

                if (conditions != null) {
                    tempsql += conditions;
                }

                sqls.add(new QueryDatabase(
                        ((extracondition != null) ? tempsql + " " + extracondition : tempsql)));

                offset += chunksize;
            }

            List<Future<ArrayList<String>>> futures = fjp.invokeAll(sqls);

            int w = 0;
            while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    ArrayList<String> fjson;
                    fjson = f.get();
                    if (fjson.size() > 0) {
                        images.addAll(fjson);
                    }

                    if (f.isDone() == false) {
                        f.cancel(true);
                    }
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    e.printStackTrace();
                }
            }
            log.info(Integer.toString(images.size()) + " image links found. Pulling.");

            ArrayList<String> tempproxies = new ArrayList<String>();

            if (proxyarr != null) {
                for (String proxy : proxyarr) {
                    tempproxies.add(proxy.trim());
                }
            }

            if (maxproxies > 0) {
                maxproxies -= 1;// 0 and 1 should be equivalent conditions
                // --num is not like most 0 based still due
                // to >=
            }

            // get images
            for (int num = 0; num < images.size(); num++) {
                String icols = images.get(num);
                int proxnum = (int) Math.random() * (tempproxies.size() - 1);
                String proxy = (tempproxies.size() == 0) ? null : tempproxies.get(proxnum);

                // add grab
                pulls.add(new ImageGrabber(icols, proxy));

                if (proxy != null) {
                    tempproxies.remove(proxy);
                }

                // check for execution
                if (num + 1 == images.size() || pulls.size() >= commitsize || tempproxies.size() == 0) {
                    if (tempproxies.size() == 0 && proxies != null) {
                        tempproxies = new ArrayList<String>(proxyarr.length);

                        for (String p : proxyarr) {
                            tempproxies.add(p.trim());
                        }
                    }

                    imageFutures = fjp.invokeAll(pulls);
                    w = 0;

                    while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                        w++;
                    }

                    for (Future<String> f : imageFutures) {
                        String add;
                        try {
                            add = f.get();

                            if (add != null) {
                                postImages.add(add);
                            }
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } catch (ExecutionException e) {
                            e.printStackTrace();
                        }
                    }
                    imageFutures = null;// garbage collect elligible
                    pulls = new HashSet<Callable<String>>(commitsize);
                }

                if (postImages.size() >= commitsize && addtoDB == true) {
                    if (addtoDB) {
                        log.info("Posting to Database");
                        log.info("Found " + postImages.size() + " images");
                        numimages += postImages.size();
                        int size = (int) Math.floor(postImages.size() / numqueries);
                        for (int n = 0; n < numqueries; n++) {
                            if (((n + 1) * size) < postImages.size() && (n + 1) < numqueries) {
                                fjp.execute(new ImagePost(postImages.subList(n * size, (n + 1) * size)));
                            } else {
                                fjp.execute(new ImagePost(postImages.subList(n * size, postImages.size() - 1)));
                            }
                        }

                        w = 0;
                        while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) {
                            w++;
                        }
                    }
                    found += postImages.size();
                    postImages.clear();
                }

            }

            if (postImages.size() > 0 && addtoDB == true) {
                log.info("Posting to Database");
                numimages += postImages.size();
                int size = (int) Math.floor(postImages.size() / numqueries);
                for (int n = 0; n < numqueries; n++) {
                    if (((n + 1) * size) < postImages.size()) {
                        fjp.execute(new ImagePost(postImages.subList(n * size, (n + 1) * size)));
                    } else {
                        fjp.execute(new ImagePost(postImages.subList(n * size, postImages.size())));
                    }
                }

                w = 0;
                while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) {
                    w++;
                }

                found += postImages.size();
                postImages.clear();
            }

            // handle iterations specs
            iter += 1;
            log.info("Iteration: " + iter);
            if ((iter < iterations && found < images.size()) || tillfound == true) {
                log.info("Not All Images Obtained Trying Iteration " + iter + " of " + iterations);
                offset -= commitsize;
            } else if ((iter < iterations && found >= images.size()) && tillfound == false) {
                log.info("Images Obtained in " + iter + " iterations. Continuing.");
                iter = 0;
            } else {
                // precautionary
                log.info("Images Obtained in " + iter + " iterations. Continuing");
                iter = 0;
            }

        } while (images.size() > 0 && iter < iterations);

        if (fjp.isShutdown()) {
            fjp.shutdownNow();
        }
    }

    log.info("Complete. Check for Errors \n " + numimages + " Images Found");
}

From source file:com.hygenics.parser.ParseJSoup.java

/**
 * Runs the Program//from   w  ww .  ja  va 2 s  . co m
 */
public void run() {
    int its = 0;

    this.select = Properties.getProperty(this.select);
    this.extracondition = Properties.getProperty(this.extracondition);
    this.column = Properties.getProperty(this.column);

    createTables();
    log.info("Starting Parse via JSoup @ " + Calendar.getInstance().getTime().toString());

    ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procs);
    Set<Callable<ArrayList<String>>> collection;
    List<Future<ArrayList<String>>> futures;
    ArrayList<String> data = new ArrayList<String>((commitsize + 10));
    ArrayList<String> outdata = new ArrayList<String>(((commitsize + 10) * 3));
    int offenderhash = offset;

    boolean run = true;
    int iteration = 0;

    int currpos = 0;
    do {
        collection = new HashSet<Callable<ArrayList<String>>>(qnums);
        log.info("Getting Data");
        // get data
        currpos = iteration * commitsize + offset;
        iteration += 1;
        String query = select;

        if (extracondition != null) {
            query += " " + extracondition;
        }

        if (extracondition != null) {
            query += " WHERE " + extracondition + " AND ";
        } else {
            query += " WHERE ";
        }

        for (int i = 0; i < qnums; i++) {

            if (currpos + (Math.round(commitsize / qnums * (i + 1))) < currpos + commitsize) {
                collection.add(new SplitQuery((query + pullid + " >= "
                        + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid
                        + " < " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i + 1)))))));
            } else {
                collection.add(new SplitQuery((query + pullid + " >= "
                        + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid
                        + " < " + Integer.toString(currpos + commitsize))));
            }
        }

        if (collection.size() > 0) {

            futures = fjp.invokeAll(collection);

            int w = 0;

            while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    // TODO Get Pages to Parse
                    data.addAll(f.get());
                } catch (NullPointerException e) {
                    log.info("Some Data Returned Null");
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }

        }

        collection = new HashSet<Callable<ArrayList<String>>>(data.size());
        // checkstring
        if (data.size() == 0 && checkstring != null && its <= maxchecks) {
            its++;
            collection.add(new SplitQuery(checkstring));

            futures = fjp.invokeAll(collection);

            int w = 0;
            while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    // TODO Get Pages to Parse
                    data.addAll(f.get());
                } catch (NullPointerException e) {
                    log.info("Some Data Returned Null");
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }

        }

        if (data.size() == 0) {
            // set to stop if size is0
            log.info("No Pages to Parse. Will Terminate");
            run = false;
        } else {
            // parse
            log.info("Starting JSoup Parse @ " + Calendar.getInstance().getTime().toString());
            for (String json : data) {
                // faster json reader is minimal json but faster parser is
                // Simple Json
                Map<String, Json> jMap = Json.read(json).asJsonMap();

                if (jMap.containsKey("offenderhash")) {
                    // string to int in case it is a string and has some
                    // extra space
                    offenderhash = Integer.parseInt(jMap.get("offenderhash").asString().trim());
                }

                boolean allow = true;

                if (mustcontain != null) {
                    if (jMap.get(column).asString().contains(mustcontain) == false) {
                        allow = false;
                    }
                }

                if (cannotcontain != null) {
                    if (jMap.get(column).asString().contains(cannotcontain)) {
                        allow = false;
                    }
                }

                // this is the fastest way. I was learning before and will
                // rewrite when time permits.
                if (allow == true) {
                    if (jMap.containsKey("offenderhash")) {
                        if (this.singlepaths != null) {
                            collection.add(new ParseSingle(Integer.toString(offenderhash), header, footer,
                                    pagenarrow, singlepaths,
                                    StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace,
                                    replaceSequence));
                        }

                        if (this.multipaths != null) {
                            collection.add(new ParseRows(Integer.toString(offenderhash), header, footer,
                                    pagenarrow, multipaths,
                                    StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace,
                                    replaceSequence));
                        }

                        if (this.recordpaths != null) {
                            collection.add(new ParseLoop(Integer.toString(offenderhash), header, footer,
                                    pagenarrow, recordpaths,
                                    StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace,
                                    replaceSequence));
                        }
                    }
                }
                offenderhash += 1;

            }

            // complete parse
            log.info("Waiting for Parsing to Complete.");
            if (collection.size() > 0) {
                futures = fjp.invokeAll(collection);

                int w = 0;
                while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) {
                    w++;
                }

                log.info("Waited for " + Integer.toString(w) + " Cycles!");
                for (Future<ArrayList<String>> f : futures) {
                    try {
                        outdata.addAll(f.get());
                    } catch (InterruptedException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    } catch (ExecutionException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }

            }
            log.info("Finished Parsing @ " + Calendar.getInstance().getTime().toString());

            int cp = 0;
            // post data
            log.info("Posting Data @ " + Calendar.getInstance().getTime().toString());
            if (outdata.size() > 0) {

                for (int i = 0; i < qnums; i++) {

                    ArrayList<String> od = new ArrayList<String>(
                            ((cp + (Math.round(outdata.size() / qnums) - cp))));

                    if (cp + (Math.round(outdata.size() / qnums)) < outdata.size()) {
                        od.addAll(outdata.subList(cp, (cp + (Math.round(outdata.size() / qnums)))));
                    } else {
                        od.addAll(outdata.subList(cp, (outdata.size() - 1)));
                    }
                    fjp.execute(new SplitPost(template, od));
                    cp += Math.round(outdata.size() / qnums);
                }

                int w = 0;
                while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                    w++;
                }
                log.info("Waited for " + Integer.toString(w) + " cycles!");

            }
            log.info("Finished Posting to DB @ " + Calendar.getInstance().getTime().toString());

            // size should remain same with 10 slot buffer room
            data.clear();
            outdata.clear();
        }

        // my favorite really desperate attempt to actually invoke garbage
        // collection because of MASSIVE STRINGS
        System.gc();
        Runtime.getRuntime().gc();

    } while (run);

    log.info("Shutting Down FJP");
    // shutdown fjp
    if (fjp.isShutdown() == false) {
        fjp.shutdownNow();
    }

    log.info("Finished Parsing @ " + Calendar.getInstance().getTime().toString());

}

From source file:com.hygenics.parser.KVParser.java

public void run() {
    log.info("Starting Parse @ " + Calendar.getInstance().getTime().toString());
    ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procs);
    Set<Callable<ArrayList<String>>> collection;
    List<Future<ArrayList<String>>> futures;
    ArrayList<String> data = new ArrayList<String>((commitsize + 10));
    ArrayList<String> outdata = new ArrayList<String>(((commitsize + 10) * 3));

    int currpos = 0;
    boolean run = true;

    while (run) {
        log.info("Getting Pages");
        // get pages
        String query = select;/*from   ww w .ja  v a2  s  .  com*/

        if (data.size() > 0) {
            data.clear();
        }

        if (extracondition != null) {
            query += " " + extracondition;
        }

        if (extracondition != null) {
            query += " WHERE " + extracondition + " AND ";
        } else {
            query += " WHERE ";
        }

        collection = new HashSet<Callable<ArrayList<String>>>(qnums);
        for (int i = 0; i < qnums; i++) {

            if (currpos + (Math.round(commitsize / qnums * (i + 1))) < currpos + commitsize) {
                collection.add(new SplitQuery((query + pullid + " >= "
                        + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid
                        + " < " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i + 1)))))));
            } else {
                collection.add(new SplitQuery((query + pullid + " >= "
                        + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid
                        + " < " + Integer.toString(currpos + commitsize))));
            }
        }

        currpos += commitsize;

        if (collection.size() > 0) {

            futures = fjp.invokeAll(collection);

            int w = 0;

            while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    ArrayList<String> darr = f.get();
                    if (darr != null && darr.size() > 0) {
                        data.addAll(darr);
                    }
                } catch (NullPointerException e) {
                    log.info("Some Data Returned Null");
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    e.printStackTrace();
                }
            }

        }

        if (data.size() == 0 && checkString != null) {
            collection = new HashSet<Callable<ArrayList<String>>>(1);
            collection.add(new SplitQuery(checkString));

            futures = fjp.invokeAll(collection);
            int w = 0;
            while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    ArrayList<String> arr = f.get();

                    if (arr != null) {
                        for (String a : arr) {
                            if (a != null) {
                                data.add(a);
                            }
                        }
                    }
                    if (!f.isDone()) {
                        f.cancel(true);
                    }
                    f = null;
                } catch (NullPointerException e) {
                    log.info("Some Data Returned Null");
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    e.printStackTrace();
                }
            }
        }

        // parse pages
        if (data.size() > 0) {
            log.info("Parsing " + Integer.toString(data.size()) + " Records");
            collection = new HashSet<Callable<ArrayList<String>>>(data.size());

            for (String json : data) {
                Map<String, Object> jmap = Json.read(json).asMap();

                // for each table in the tags Map which is a key
                for (String k : tags.keySet()) {

                    collection.add(new Parser(tags.get(k), jmap.get(htmlColumn).toString(), replacePattern,
                            replacement, jmap.get(hashColumn).toString(), hashColumn, k));

                    if (collection.size() + 1 == data.size()
                            || (collection.size() % commitsize == 0 && collection.size() >= commitsize)) {
                        log.info("Waiting for Tasks to Complete");
                        futures = fjp.invokeAll(collection);

                        // post data
                        int w = 0;
                        while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                            w++;
                        }

                        for (Future<ArrayList<String>> future : futures) {
                            try {
                                outdata.addAll(future.get());
                            } catch (NullPointerException e) {
                                log.info("Some Data Returned Null");
                            } catch (InterruptedException e) {
                                e.printStackTrace();
                            } catch (ExecutionException e) {
                                e.printStackTrace();
                            }
                        }

                        log.info("Parsed " + outdata.size() + " records!");
                        // post data
                        int cp = 0;
                        if (outdata.size() > 0) {
                            checkTables(outdata);
                            this.sendToDb(outdata, true);
                            outdata = new ArrayList<String>(commitsize);
                        }

                    }

                }
            }
            data = new ArrayList<String>(commitsize);
        } else {
            log.info("No Records Found. Terminating!");
            run = false;
        }

    }

    if (outdata.size() > 0) {
        log.info("Posting Last Records");
        // post remaining pages for the iteration
        if (outdata.size() > 0) {
            int cp = 0;
            if (outdata.size() > 0) {
                checkTables(outdata);
                this.sendToDb(outdata, true);
            }
            data.clear();
            outdata.clear();
        }
    }

    // shutdown
    log.info("Complete! Shutting Down FJP.");
    fjp.shutdownNow();

    log.info("Finished Parse @ " + Calendar.getInstance().getTime().toString());
}

From source file:com.hygenics.parser.GetImages.java

private void addFromFile() {
    File f = new File(fpath);
    ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors());
    ArrayList<String> imageData = new ArrayList<String>();
    int size = (int) Math.ceil(commitsize / numqueries);

    if (f.exists()) {
        // get the image data
        File[] list = f.listFiles();
        int curr = 0;
        if (list != null) {
            for (File img : list) {
                curr += 1;/*from   w w  w. j  ava2 s  .  com*/
                if (img.isDirectory() == false
                        && (img.getName().contains(".bmp") || img.getName().toLowerCase().contains(".jpg")
                                || img.getName().toLowerCase().contains(".png")
                                || img.getName().toLowerCase().contains("jpeg"))) {
                    try {
                        if (dbcondition == null
                                || template
                                        .getJsonData(dbcondition.replace("$IMAGE$",
                                                img.getName().replaceAll("(?mis)" + imagepostfix, "")))
                                        .size() > 0) {
                            BufferedImage bi = ImageIO.read(img);// only
                            // used
                            // to
                            // ensure
                            // that
                            // this
                            // is an
                            // image
                            JsonObject jobj = new JsonObject();
                            jobj.add("offenderhash", img.getName().replaceAll("(?mis)" + imagepostfix, ""));// assumes
                            // hash
                            // is
                            // file
                            // name+postfix
                            jobj.add("image", img.getName().replaceAll("(?mis)" + imagepostfix, ""));
                            jobj.add("image_path", img.getName());
                            jobj.add("table", table);
                            jobj.add("date", Calendar.getInstance().getTime().toString());
                            imageData.add(jobj.toString());
                        }
                    } catch (IOException e) {
                        log.info(img.getName() + " is not an Image!");
                        e.printStackTrace();
                    } catch (Exception e) {
                        log.info("Error in Posting to Database.");
                        e.printStackTrace();
                    }
                }

                // post if > commitsize
                if (imageData.size() >= commitsize || curr == list.length) {
                    log.info("Posting to DB @ " + Calendar.getInstance().getTime().toString());
                    for (int i = 0; i < numqueries; i++) {
                        if (((i + 1) * size) < imageData.size()) {
                            fjp.execute(new ImagePost(imageData.subList((i * size), ((i + 1) * size))));
                        } else {
                            fjp.execute(new ImagePost(imageData.subList((i * size), imageData.size())));
                        }
                    }

                    int w = 0;
                    while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                        w++;
                    }
                    log.info("Waited for " + w + " cycles");
                    imageData.clear();
                    log.info("Finished Posting to DB @ " + Calendar.getInstance().getTime().toString());
                }
            }
        }

    } else {
        log.error("File Path does Not Exist.Please Check Image Pull!");
    }
    fjp.shutdown();
    fjp = null;
}

From source file:com.hygenics.parser.ParseDispatcher.java

private void spl(ArrayList<String> json, boolean split) {
    if (json.size() > 0)
        log.info("Records to Add: " + json.size());

    if (split) {/*from  w w w.j  a v a 2 s  . c o  m*/

        ForkJoinPool f2 = new ForkJoinPool(
                (Runtime.getRuntime().availableProcessors() + ((int) Math.ceil(procnum * sqlnum))));
        ArrayList<String> l;
        int size = (int) Math.ceil(json.size() / qnum);
        for (int conn = 0; conn < qnum; conn++) {
            l = new ArrayList<String>();
            if (((conn + 1) * size) < json.size()) {
                l.addAll(json.subList((conn * size), ((conn + 1) * size)));

            } else {
                l.addAll(json.subList((conn * size), (json.size() - 1)));
                f2.execute(new SplitPost(template, l));

                break;
            }

            f2.execute(new SplitPost(template, l));
        }

        try {
            f2.awaitTermination(termtime, TimeUnit.MILLISECONDS);
        } catch (InterruptedException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }

        f2.shutdown();

        int incrementor = 0;

        while (f2.isShutdown() == false && f2.getActiveThreadCount() > 0 && f2.isQuiescent() == false) {
            incrementor++;
            try {
                Thread.sleep(100);
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            log.info("Shutting Down" + incrementor);
        }

        l = null;
        f2 = null;

    } else {
        for (String j : json) {

            boolean valid = false;

            try {
                Json.read(j);
                valid = true;
            } catch (Exception e) {
                log.info("ERROR: JSON NOT FORMATTED PROPERLY");
                System.out.println(j);
            }

            try {

                this.template.postSingleJson(j);
            } catch (Exception e) {
                log.info("Failed to Post");
                log.error(j);
                e.printStackTrace();
            }
        }
    }

}

From source file:com.hygenics.parser.ParseDispatcher.java

private void sendToDb(ArrayList<String> json, boolean split) {
    if (json.size() > 0)
        log.info("Records to Add: " + json.size());

    if (split) {//from   www  . j  ava2s .  com

        ForkJoinPool f2 = new ForkJoinPool(
                (Runtime.getRuntime().availableProcessors() + ((int) Math.ceil(procnum * sqlnum))));
        ArrayList<String> l;
        int size = (int) Math.ceil(json.size() / qnum);
        for (int conn = 0; conn < qnum; conn++) {
            l = new ArrayList<String>();
            if (((conn + 1) * size) < json.size()) {
                l.addAll(json.subList((conn * size), ((conn + 1) * size)));

            } else {
                l.addAll(json.subList((conn * size), (json.size() - 1)));
                f2.execute(new SplitPost(template, l));

                break;
            }

            f2.execute(new SplitPost(template, l));
        }

        try {
            f2.awaitTermination(termtime, TimeUnit.MILLISECONDS);
        } catch (InterruptedException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }

        f2.shutdown();

        int incrementor = 0;

        while (f2.isShutdown() == false && f2.getActiveThreadCount() > 0 && f2.isQuiescent() == false) {
            incrementor++;
            try {
                Thread.sleep(100);
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            log.info("Shutting Down" + incrementor);
        }

        l = null;
        f2 = null;

    } else {
        for (String j : json) {

            boolean valid = false;

            try {
                Json.read(j);
                valid = true;
            } catch (Exception e) {
                log.info("ERROR: JSON NOT FORMATTED PROPERLY");
                System.out.println(j);
            }

            try {

                this.template.postSingleJson(j);
            } catch (Exception e) {
                log.info("Failed to Post");
                log.error(j);
                e.printStackTrace();
            }
        }
    }

}

From source file:com.hygenics.parser.ParseDispatcher.java

/**
 * Fork/Join Pool Solution Maximizes Speed. JSon increases ease of use
 * /*  w  w  w .  jav a 2 s  .c  o m*/
 */
public void run() {
    log.info("Starting Clock and Parsing @" + Calendar.getInstance().getTime().toString());
    long t = Calendar.getInstance().getTimeInMillis();
    int pid = 0;
    int id = 0;
    int checkattempts = 0;
    String add = null;

    this.schema = Properties.getProperty(this.schema);
    this.select = Properties.getProperty(this.select);
    this.extracondition = Properties.getProperty(this.extracondition);
    this.column = Properties.getProperty(this.column);

    ArrayList<String> parsedrows = new ArrayList<String>();

    Set<Callable<String>> collect = new HashSet<Callable<String>>();
    List<Future<String>> futures;

    List<Future<ArrayList<String>>> qfutures;
    Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>(4);

    ForkJoinPool fjp = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * procnum));

    if (schema != null) {
        createTables();
    }

    boolean run = true;
    String condition;
    int w = 0;
    int start = offset;
    int chunksize = (int) Math.ceil(pullsize / qnum);

    // attempt to query the database from multiple threads
    do {
        // query for pages
        pages = new ArrayList<String>(pullsize);
        log.info("Looking for Pages.");
        for (int conn = 0; conn < qnum; conn++) {
            // create condition
            condition = " WHERE " + pullid + " >= " + (start + (conn * chunksize)) + " AND " + pullid + " < "
                    + Integer.toString(start + (chunksize * (conn + 1)));

            if (extracondition != null) {
                condition += " " + extracondition.trim();
            }

            // get queries
            qcollect.add(new SplitQuery(template, (select + condition)));
            log.info("Fetching " + select + condition);
        }
        start += (chunksize * qnum);

        qfutures = fjp.invokeAll(qcollect);

        w = 0;
        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
            w++;
        }
        log.info("Waited for " + w + " cycles");

        for (Future<ArrayList<String>> f : qfutures) {
            try {

                ArrayList<String> test = f.get();
                if (test != null) {
                    if (test.size() > 0) {
                        pages.addAll(test);
                    }
                }

                if (f.isDone() == false) {
                    f.cancel(true);
                }

                f = null;
            } catch (Exception e) {
                log.warn("Encoding Error!");
                e.printStackTrace();
            }
        }
        qcollect = new HashSet<Callable<ArrayList<String>>>(4);
        qfutures = null;
        log.info("Finished Getting Pages");

        // if no records then get records that may have been dropped
        if (pages.size() == 0 && checkstring != null && checkstring.trim().length() > 0
                && checkattempts < reattempts) {
            checkattempts += 1;
            log.info("Checking for Drops");
            qcollect.add(new SplitQuery(template, (checkstring)));
            qfutures = fjp.invokeAll(qcollect);

            w = 0;
            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                w++;
            }
            log.info("Waited for " + w + " cycles");

            for (Future<ArrayList<String>> f : qfutures) {
                try {

                    ArrayList<String> test = f.get();
                    if (test != null) {
                        if (test.size() > 0) {
                            pages.addAll(test);
                        }
                    }

                    if (f.isDone() == false) {
                        f.cancel(true);
                    }

                    f = null;
                } catch (Exception e) {
                    log.warn("Encoding Error!");
                    e.printStackTrace();
                }
            }
            qfutures = null;
            qcollect = new HashSet<Callable<ArrayList<String>>>(4);

        } else if (checkattempts >= reattempts) {
            pages.clear();
        }

        log.info("Found " + pages.size() + " records!");

        // get hashes if necessary
        if (getHash) {
            log.info("Hashing " + pages.size() + " Records");

            ArrayList<String> hashedrows = new ArrayList<String>();
            for (String row : pages) {

                collect.add(new CreateHash(row, pid));
                pid++;

            }

            log.info("Invoking");
            futures = fjp.invokeAll(collect);

            w = 0;
            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                w++;
            }

            log.info("Waited " + w + " Cycles!");

            for (Future<String> f : futures) {
                if (f != null) {
                    String json;
                    try {
                        json = f.get(termtime, TimeUnit.MILLISECONDS);

                        if (json != null) {
                            hashedrows.add(json);
                        }

                    } catch (Exception e) {
                        log.warn("Encoding Error!");
                        e.printStackTrace();
                    }
                }

            }
            log.info("Hashed " + hashedrows.size() + " Records!");
            pages = hashedrows;

            collect = new HashSet<Callable<String>>(pullsize);
            futures.clear();
            log.info("Completed Hashing");
        }

        log.info("Performing Regex");
        // handle single patterns
        int i = 0;
        if (singlepats != null) {

            log.info("Found Singlepats");
            int subs = 0;
            int rows = 0;
            for (String row : pages) {
                rows += 1;
                String inrow = row;
                try {

                    inrow = inrow.replaceAll("\t|\r|\r\n|\n", "");

                    Map<String, Json> jmap = Json.read(inrow).asJsonMap();

                    if (singlepats.containsKey("table")) {
                        subs += 1;

                        if (fjp.isShutdown()) {
                            fjp = new ForkJoinPool((Runtime.getRuntime().availableProcessors() * procnum));
                        }

                        if (jmap.get(column) != null) {

                            if (test) {
                                System.out.println("//////////////////////HTML////////////////////////\n"
                                        + jmap.get(column).asString()
                                        + "\n///////////////////////////////END///////////////////////////\n\n");
                            }

                            if (mustcontain != null) {
                                if (jmap.get(column).asString().contains(mustcontain)) {
                                    if (cannotcontain != null) {
                                        if (jmap.get(column).asString().contains(cannotcontain) == false)
                                            collect.add(new ParsePage(unescape, replacementPattern,
                                                    singlepats.get("table"),
                                                    jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                                    singlepats, Calendar.getInstance().getTime().toString(),
                                                    jmap.get("offenderhash").asString()));
                                    } else {
                                        collect.add(new ParsePage(unescape, replacementPattern,
                                                singlepats.get("table"),
                                                jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                                singlepats, Calendar.getInstance().getTime().toString(),
                                                jmap.get("offenderhash").asString()));
                                    }
                                }
                            } else if (cannotcontain != null) {
                                if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                    collect.add(
                                            new ParsePage(unescape, replacementPattern, singlepats.get("table"),
                                                    jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                                    singlepats, Calendar.getInstance().getTime().toString(),
                                                    jmap.get("offenderhash").asString()));
                                }
                            } else {
                                collect.add(new ParsePage(unescape, replacementPattern, singlepats.get("table"),
                                        jmap.get(column).asString().replaceAll("\\s\\s", " "), singlepats,
                                        Calendar.getInstance().getTime().toString(),
                                        jmap.get("offenderhash").asString()));
                            }
                        }
                    }
                    i++;

                    if (((i % commit_size) == 0 & i != 0) || i == pages.size()
                            || pages.size() == 1 && singlepats != null) {
                        log.info("Getting Regex Results");

                        log.info("Getting Tasks");

                        futures = fjp.invokeAll(collect);

                        w = 0;

                        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                            w++;
                        }

                        log.info("Waited for " + w + " cycles");

                        for (Future<String> r : futures) {
                            try {

                                add = r.get();
                                if (add.contains("No Data") == false) {
                                    parsedrows.add(add);
                                }

                                add = null;

                            } catch (Exception e) {
                                log.warn("Encoding Error!");
                                e.printStackTrace();
                            }
                        }

                        futures = null;
                        collect = new HashSet<Callable<String>>();

                        if (parsedrows.size() >= commit_size) {
                            log.info("INSERTING " + parsedrows.size() + " records!");
                            if (parsedrows.size() >= SPLITSIZE) {
                                sendToDb(parsedrows, true);
                            } else {
                                sendToDb(parsedrows, false);
                            }

                            parsedrows = new ArrayList<String>(pullsize);
                        }

                        // hint to the gc in case it actually pays off; use
                        // -X:compactexplicitgc to improve odds and
                        // -XX:UseConcMarkSweepGC for improving odds on
                        // older generation strings
                        // (think if i were a gambling man)
                        System.gc();
                        Runtime.getRuntime().gc();
                    }
                } catch (Exception e) {
                    log.warn("Encoding Error!");
                    e.printStackTrace();
                }
            }
            log.info("Submitted " + subs + " records. Found " + rows + " rows");
        }

        log.info("REMAINING ROWS TO COMMIT " + parsedrows.size());
        log.info("Rows Left" + parsedrows.size());
        if (parsedrows.size() > 0) {

            if (parsedrows.size() >= SPLITSIZE) {
                sendToDb(parsedrows, true);
            } else {
                sendToDb(parsedrows, false);
            }

            parsedrows = new ArrayList<String>();
        }

        // handle multi patterns
        if (multipats != null) {
            // parse multiple pages for the run
            int subs = 0;
            for (String row : pages) {
                try {
                    for (String k : multipats.keySet()) {
                        if (fjp.isShutdown()) {

                            fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors());
                        }

                        Map<String, Json> jmap = Json.read(row).asJsonMap();

                        if (jmap.get(column) != null) {
                            subs += 1;
                            if (test) {
                                System.out.println("//////////////////////HTML////////////////////////\n"
                                        + jmap.get(column).asString()
                                        + "\n///////////////////////////////END///////////////////////////\n\n");
                            }

                            if (mustcontain != null) {
                                if (jmap.get(column).asString().contains(mustcontain)) {
                                    if (cannotcontain != null) {
                                        if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                            collect.add(
                                                    new ParseMultiPage(unescape, replacementPattern, k,
                                                            jmap.get(column).asString().replaceAll("\\s\\s",
                                                                    " "),
                                                            jmap.get("offenderhash").asString(),
                                                            Calendar.getInstance().getTime().toString(),
                                                            multipats.get(k)));
                                        }
                                    } else {
                                        collect.add(new ParseMultiPage(unescape, replacementPattern, k,
                                                jmap.get(column).asString(),
                                                jmap.get("offenderhash").asString().replaceAll("\\s\\s", " "),
                                                Calendar.getInstance().getTime().toString(), multipats.get(k)));
                                    }
                                }
                            } else if (cannotcontain != null) {
                                if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                    collect.add(new ParseMultiPage(unescape, replacementPattern, k,
                                            jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                            jmap.get("offenderhash").asString(),
                                            Calendar.getInstance().getTime().toString(), multipats.get(k)));
                                }

                            } else {
                                collect.add(new ParseMultiPage(unescape, replacementPattern, k,
                                        jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                        jmap.get("offenderhash").asString(),
                                        Calendar.getInstance().getTime().toString(), multipats.get(k)));
                            }
                        }

                        i++;
                        if (((i % commit_size) == 0 & i != 0) || i == pages.size()
                                || pages.size() == 1 && multipats != null) {
                            futures = fjp.invokeAll(collect);
                            w = 0;
                            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                                w++;
                            }

                            log.info("Waited " + w + " Cycles");

                            for (Future<String> r : futures) {
                                try {
                                    add = r.get();

                                    if (add.contains("No Data") == false) {

                                        for (String js : add.split("~")) {
                                            parsedrows.add(js);
                                        }
                                    }
                                    add = null;

                                    if (r.isDone() == false) {
                                        r.cancel(true);
                                    }
                                    r = null;

                                } catch (InterruptedException e) {
                                    // TODO Auto-generated catch block
                                    e.printStackTrace();
                                } catch (ExecutionException e) {
                                    // TODO Auto-generated catch block
                                    e.printStackTrace();
                                }
                            }

                            futures = null;
                            collect = new HashSet<Callable<String>>();

                            if (parsedrows.size() >= commit_size) {
                                log.info("INSERTING " + parsedrows.size() + " records!");
                                if (parsedrows.size() >= SPLITSIZE) {
                                    sendToDb(parsedrows, true);
                                } else {
                                    sendToDb(parsedrows, false);
                                }
                                parsedrows = new ArrayList<String>(pullsize);
                            }

                            // hint to the gc in case it actually pays off
                            System.gc();
                            Runtime.getRuntime().gc();
                        }
                    }

                } catch (Exception e) {
                    log.warn("Encoding Error!");
                }

            }
            log.info("Submitted " + subs + " records.");
        }

        // handle looped patterns
        if (loopedpats != null) {
            log.info("Looped Patterns Found");
            int subs = 0;
            if (fjp.isShutdown()) {
                fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum);
            }

            for (String row : pages) {
                try {

                    for (String k : loopedpats.keySet()) {
                        if (fjp.isShutdown()) {
                            fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum);
                        }
                        Map<String, Json> jmap = Json.read(row).asJsonMap();

                        if (jmap.get(column) != null) {
                            subs += 1;
                            if (mustcontain != null) {
                                if (jmap.get(column).asString().contains(mustcontain)) {
                                    if (cannotcontain != null) {
                                        if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                            collect.add(
                                                    new LoopRegex(unescape,
                                                            jmap.get(column).asString().replaceAll("\\s\\s",
                                                                    " "),
                                                            jmap.get("offenderhash").asString(),
                                                            Calendar.getInstance().getTime().toString(), k,
                                                            replacementPattern, loopedpats.get(k), test));
                                        }
                                    } else {
                                        collect.add(new LoopRegex(unescape,
                                                jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                                jmap.get("offenderhash").asString(),
                                                Calendar.getInstance().getTime().toString(), k,
                                                replacementPattern, loopedpats.get(k), test));
                                    }
                                }
                            } else if (cannotcontain != null) {
                                if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                    collect.add(new LoopRegex(unescape,
                                            jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                            jmap.get("offenderhash").asString(),
                                            Calendar.getInstance().getTime().toString(), k, replacementPattern,
                                            loopedpats.get(k), test));
                                }
                            } else {
                                collect.add(new LoopRegex(unescape,
                                        jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                        jmap.get("offenderhash").asString(),
                                        Calendar.getInstance().getTime().toString(), k, replacementPattern,
                                        loopedpats.get(k), test));
                            }
                            jmap.remove(k);
                        }
                        i++;
                        if (((i % commit_size) == 0 & i != 0) || (i % (pages.size() - 1)) == 0
                                || pages.size() == 1) {

                            futures = fjp.invokeAll(collect);

                            w = 0;

                            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                                w++;
                            }
                            log.info("Waited " + w + " Cycles");

                            for (Future<String> r : futures) {
                                try {
                                    add = r.get();
                                    if (add.contains("No Data") == false) {
                                        for (String toarr : add.split("~")) {
                                            parsedrows.add(toarr);
                                        }
                                    }

                                    if (r.isDone() == false) {
                                        r.cancel(true);
                                    }
                                    add = null;

                                } catch (Exception e) {
                                    log.warn("Encoding Error!");
                                    e.printStackTrace();
                                }
                            }

                            futures = null;
                            collect = new HashSet<Callable<String>>();

                            // hint to the gc in case it actually pays off
                            System.gc();
                            Runtime.getRuntime().gc();
                        }
                    }

                    if (parsedrows.size() >= this.commit_size) {
                        log.info("INSERTING " + parsedrows.size() + " records!");
                        if (parsedrows.size() >= SPLITSIZE) {
                            sendToDb(parsedrows, true);
                        } else {
                            sendToDb(parsedrows, false);
                        }

                        parsedrows = new ArrayList<String>(pullsize);
                    }

                } catch (Exception e) {
                    log.warn("Encoding Error!");
                }
            }
            log.info("Submitted " + subs + " records.");
        }

        if (collect.size() > 0) {
            log.info("Getting Last Regex Results for Iteration");

            log.info("Getting Tasks");

            futures = fjp.invokeAll(collect);

            w = 0;

            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                w++;
            }

            log.info("Waited for " + w + " cycles");

            for (Future<String> r : futures) {
                try {

                    add = r.get();
                    if (add.contains("No Data") == false) {
                        parsedrows.add(add);
                    }

                    add = null;

                } catch (Exception e) {
                    log.warn("Encoding Error!");
                    e.printStackTrace();
                }
            }

            futures = null;
            collect = new HashSet<Callable<String>>(pullsize);
            // hint to the gc in case it actually pays off; use
            // -X:compactexplicitgc to improve odds and
            // -XX:UseConcMarkSweepGC for improving odds on older generation
            // strings
            // (think if i were a gambling man)
            System.gc();
            Runtime.getRuntime().gc();
        }

        log.info("REMAINING ROWS TO COMMIT " + parsedrows.size());
        log.info("Rows Left" + parsedrows.size());
        if (parsedrows.size() > 0) {

            if (parsedrows.size() >= SPLITSIZE) {
                sendToDb(parsedrows, true);
            } else {
                sendToDb(parsedrows, false);
            }

            parsedrows = new ArrayList<String>();
        }

    } while (pages != null && pages.size() > 0);

    // ensure that nothing is still caught in limbo
    // final parser to ensure that nothing is left out
    if (collect.size() > 0) {
        log.info("More Rows Caught in FJP, Completing Process");
        futures = fjp.invokeAll(collect);

        w = 0;

        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
            w++;
        }
        log.info("Waited " + w + " Cycles");

        for (Future<String> r : futures) {
            try {
                add = r.get();

                if (add.contains("No Data") == false) {

                    for (String js : add.split("~")) {
                        parsedrows.add(js);
                    }
                }
                add = null;

                if (r.isDone() == false) {
                    r.cancel(true);
                }
                r = null;

            } catch (InterruptedException e) {
                e.printStackTrace();
            } catch (ExecutionException e) {
                e.printStackTrace();
            }
        }

        futures = null;
        collect = null;
    }

    // send any remaining parsed rows to the db
    if (parsedrows.size() > 0) {

        if (parsedrows.size() >= SPLITSIZE) {
            sendToDb(parsedrows, true);
        } else {
            sendToDb(parsedrows, false);
        }

        parsedrows = new ArrayList<String>();
    }

    log.info("Shutting Down Fork Join Pool");
    if (fjp.isShutdown() == false) {
        fjp.shutdownNow();
    }

    fjp = null;

    log.info("Complete @" + Calendar.getInstance().getTime().toString());
    log.info("Total Runtime(seconds): "
            + Double.toString((double) (Calendar.getInstance().getTimeInMillis() - t) / 1000));

    // hint to the gc in case it actually pays off
    System.gc();
    Runtime.getRuntime().gc();
}