Example usage for java.util.concurrent ExecutorService isTerminated

List of usage examples for java.util.concurrent ExecutorService isTerminated


In this page you can find the example usage for java.util.concurrent ExecutorService isTerminated.


boolean isTerminated();

Source Link


Returns true if all tasks have completed following shut down.


From source file:Main.java

public static void main(String[] args) {
    ExecutorService pool = Executors.newCachedThreadPool();
    Main app = new Main();
    for (int i = 0; i < 1000; i++) {
        pool.execute(app.new Adding());
    }/*from   w w  w .j av  a  2  s.  c o m*/
    while (!pool.isTerminated()) {
        System.out.println(" Is it done? : " + pool.isTerminated());
    System.out.println(" Is it done? : " + pool.isTerminated());
    System.out.println("Sum is " + app.sum);

From source file:WordLengthCallable.java

public static void main(String[] args) throws Exception {
    int THREAD_COUNT = 4;
    ExecutorService execService = Executors.newFixedThreadPool(THREAD_COUNT);
    CompletionService<Integer> completionService = new ExecutorCompletionService<>(execService);

    for (int i = 0; i < THREAD_COUNT; i++) {
        completionService.submit(new WordLengthCallable());
    }/* w  w w . j av  a2 s. c o  m*/
    while (!execService.isTerminated()) {
        int result = completionService.take().get().intValue();
        System.out.println("Result is: " + result);

From source file:fr.inria.edelweiss.kgdqp.core.CentralizedInferrencing.java

public static void main(String args[])
        throws ParseException, EngineException, InterruptedException, IOException {

    List<String> endpoints = new ArrayList<String>();
    String queryPath = null;/*  ww w  .  j  a  v a2s . c om*/
    boolean rulesSelection = false;
    File rulesDir = null;
    File ontDir = null;

    Graph graph = Graph.create();
    QueryProcess exec = QueryProcess.create(graph);

    Options options = new Options();
    Option helpOpt = new Option("h", "help", false, "print this message");
    //        Option queryOpt = new Option("q", "query", true, "specify the sparql query file");
    //        Option endpointOpt = new Option("e", "endpoint", true, "a federated sparql endpoint URL");
    Option versionOpt = new Option("v", "version", false, "print the version information and exit");
    Option rulesOpt = new Option("r", "rulesDir", true, "directory containing the inference rules");
    Option ontOpt = new Option("o", "ontologiesDir", true,
            "directory containing the ontologies for rules selection");
    //        Option locOpt = new Option("c", "centralized", false, "performs centralized inferences");
    Option dataOpt = new Option("l", "load", true, "data file or directory to be loaded");
    //        Option selOpt = new Option("s", "rulesSelection", false, "if set to true, only the applicable rules are run");
    //        options.addOption(queryOpt);
    //        options.addOption(endpointOpt);
    //        options.addOption(selOpt);
    //        options.addOption(locOpt);

    String header = "Corese/KGRAM rule engine experiment command line interface";
    String footer = "\nPlease report any issue to alban.gaignard@cnrs.fr, olivier.corby@inria.fr";

    CommandLineParser parser = new BasicParser();
    CommandLine cmd = parser.parse(options, args);
    if (cmd.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("kgdqp", header, options, footer, true);
    if (cmd.hasOption("o")) {
        rulesSelection = true;
        String ontDirPath = cmd.getOptionValue("o");
        ontDir = new File(ontDirPath);
        if (!ontDir.isDirectory()) {
            logger.warn(ontDirPath + " is not a valid directory path.");
    if (!cmd.hasOption("r")) {
        logger.info("You must specify a path for inference rules directory !");

    if (cmd.hasOption("l")) {
        String[] dataPaths = cmd.getOptionValues("l");
        for (String path : dataPaths) {
            Load ld = Load.create(graph);
            logger.info("Loaded " + path);

    if (cmd.hasOption("v")) {
        logger.info("version 3.0.4-SNAPSHOT");

    String rulesDirPath = cmd.getOptionValue("r");
    rulesDir = new File(rulesDirPath);
    if (!rulesDir.isDirectory()) {
        logger.warn(rulesDirPath + " is not a valid directory path.");

    // Local rules graph initialization
    Graph rulesG = Graph.create();
    Load ld = Load.create(rulesG);

    if (rulesSelection) {
        // Ontology loading
        if (ontDir.isDirectory()) {
            for (File o : ontDir.listFiles()) {
                logger.info("Loading " + o.getAbsolutePath());

    // Rules loading
    if (rulesDir.isDirectory()) {
        for (File r : rulesDir.listFiles()) {
            logger.info("Loading " + r.getAbsolutePath());

    // Rule engine initialization
    RuleEngine ruleEngine = RuleEngine.create(graph);

    StopWatch sw = new StopWatch();
    logger.info("Federated graph size : " + graph.size());
    logger.info("Rules graph size : " + rulesG.size());

    // Rule selection
    logger.info("Rules selection");
    QueryProcess localKgram = QueryProcess.create(rulesG);
    ArrayList<String> applicableRules = new ArrayList<String>();
    String rulesSelQuery = "";
    if (rulesSelection) {
        rulesSelQuery = pertinentRulesQuery;
    } else {
        rulesSelQuery = allRulesQuery;
    Mappings maps = localKgram.query(rulesSelQuery);
    logger.info("Rules selected in " + sw.getTime() + " ms");
    logger.info("Applicable rules : " + maps.size());

    // Selected rule loading
    for (Mapping map : maps) {
        IDatatype dt = (IDatatype) map.getValue("?res");
        String rule = dt.getLabel();
        //loading rule in the rule engine
        //            logger.info("Adding rule : ");
        //            System.out.println("-------");
        //            System.out.println(rule);
        //            System.out.println("");
        //            if (! rule.toLowerCase().contains("sameas")) {
        //            }

    // Rules application on distributed sparql endpoints
    logger.info("Rules application (" + applicableRules.size() + " rules)");
    ExecutorService threadPool = Executors.newCachedThreadPool();
    RuleEngineThread ruleThread = new RuleEngineThread(ruleEngine);

    //        ruleEngine.process();

    //monitoring loop
    while (!threadPool.isTerminated()) {
        //            System.out.println("******************************");
        //            System.out.println(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter));
        //            System.out.println("Rule engine running for " + sw.getTime() + " ms");
        //            System.out.println("Federated graph size : " + graph.size());
        System.out.println(sw.getTime() + " , " + graph.size());

    logger.info("Federated graph size : " + graph.size());
    //        logger.info(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter));

    //        TripleFormat f = TripleFormat.create(graph, true);
    //        f.write("/tmp/gAll.ttl");


From source file:fr.inria.edelweiss.kgdqp.core.CentralizedInferrencingNoSpin.java

public static void main(String args[])
        throws ParseException, EngineException, InterruptedException, IOException, LoadException {

    List<String> endpoints = new ArrayList<String>();
    String queryPath = null;/* ww w .j  a  v a2  s.co m*/
    boolean rulesSelection = false;
    File rulesDir = null;
    File ontDir = null;

    Graph graph = Graph.create();
    QueryProcess exec = QueryProcess.create(graph);

    Options options = new Options();
    Option helpOpt = new Option("h", "help", false, "print this message");
    //        Option queryOpt = new Option("q", "query", true, "specify the sparql query file");
    //        Option endpointOpt = new Option("e", "endpoint", true, "a federated sparql endpoint URL");
    Option versionOpt = new Option("v", "version", false, "print the version information and exit");
    Option rulesOpt = new Option("r", "rulesDir", true, "directory containing the inference rules");
    Option ontOpt = new Option("o", "ontologiesDir", true,
            "directory containing the ontologies for rules selection");
    //        Option locOpt = new Option("c", "centralized", false, "performs centralized inferences");
    Option dataOpt = new Option("l", "load", true, "data file or directory to be loaded");
    //        Option selOpt = new Option("s", "rulesSelection", false, "if set to true, only the applicable rules are run");
    //        options.addOption(queryOpt);
    //        options.addOption(endpointOpt);
    //        options.addOption(selOpt);
    //        options.addOption(locOpt);

    String header = "Corese/KGRAM rule engine experiment command line interface";
    String footer = "\nPlease report any issue to alban.gaignard@cnrs.fr, olivier.corby@inria.fr";

    CommandLineParser parser = new BasicParser();
    CommandLine cmd = parser.parse(options, args);
    if (cmd.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("kgdqp", header, options, footer, true);
    if (cmd.hasOption("o")) {
        rulesSelection = true;
        String ontDirPath = cmd.getOptionValue("o");
        ontDir = new File(ontDirPath);
        if (!ontDir.isDirectory()) {
            logger.warn(ontDirPath + " is not a valid directory path.");
    if (!cmd.hasOption("r")) {
        logger.info("You must specify a path for inference rules directory !");

    if (cmd.hasOption("l")) {
        String[] dataPaths = cmd.getOptionValues("l");
        for (String path : dataPaths) {
            Load ld = Load.create(graph);
            logger.info("Loaded " + path);

    if (cmd.hasOption("v")) {
        logger.info("version 3.0.4-SNAPSHOT");

    String rulesDirPath = cmd.getOptionValue("r");
    rulesDir = new File(rulesDirPath);
    if (!rulesDir.isDirectory()) {
        logger.warn(rulesDirPath + " is not a valid directory path.");

    // Local rules graph initialization
    Graph rulesG = Graph.create();
    Load ld = Load.create(rulesG);

    if (rulesSelection) {
        // Ontology loading
        if (ontDir.isDirectory()) {
            for (File o : ontDir.listFiles()) {
                logger.info("Loading " + o.getAbsolutePath());

    // Rules loading
    if (rulesDir.isDirectory()) {
        for (File r : rulesDir.listFiles()) {
            if (r.getAbsolutePath().endsWith(".rq")) {
                logger.info("Loading " + r.getAbsolutePath());
                //                ld.load(r.getAbsolutePath());

                //                    byte[] encoded = Files.readAllBytes(Paths.get(r.getAbsolutePath()));
                //                    String construct = new String(encoded, "UTF-8"); //StandardCharsets.UTF_8);

                FileInputStream f = new FileInputStream(r);
                QueryLoad ql = QueryLoad.create();
                String construct = ql.read(f);

                SPINProcess sp = SPINProcess.create();
                String spinConstruct = sp.toSpin(construct);

                ld.load(new ByteArrayInputStream(spinConstruct.getBytes()), Load.TURTLE_FORMAT);
                logger.info("Rules graph size : " + rulesG.size());


    // Rule engine initialization
    RuleEngine ruleEngine = RuleEngine.create(graph);

    StopWatch sw = new StopWatch();
    logger.info("Federated graph size : " + graph.size());
    logger.info("Rules graph size : " + rulesG.size());

    // Rule selection
    logger.info("Rules selection");
    QueryProcess localKgram = QueryProcess.create(rulesG);
    ArrayList<String> applicableRules = new ArrayList<String>();
    String rulesSelQuery = "";
    if (rulesSelection) {
        rulesSelQuery = pertinentRulesQuery;
    } else {
        rulesSelQuery = allRulesQuery;
    Mappings maps = localKgram.query(rulesSelQuery);
    logger.info("Rules selected in " + sw.getTime() + " ms");
    logger.info("Applicable rules : " + maps.size());

    // Selected rule loading
    for (Mapping map : maps) {
        IDatatype dt = (IDatatype) map.getValue("?res");
        String rule = dt.getLabel();
        //loading rule in the rule engine
        //            logger.info("Adding rule : ");
        //            System.out.println("-------");
        //            System.out.println(rule);
        //            System.out.println("");
        //            if (! rule.toLowerCase().contains("sameas")) {
        //            }

    // Rules application on distributed sparql endpoints
    logger.info("Rules application (" + applicableRules.size() + " rules)");
    ExecutorService threadPool = Executors.newCachedThreadPool();
    RuleEngineThread ruleThread = new RuleEngineThread(ruleEngine);

    //        ruleEngine.process();

    //monitoring loop
    while (!threadPool.isTerminated()) {
        //            System.out.println("******************************");
        //            System.out.println(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter));
        //            System.out.println("Rule engine running for " + sw.getTime() + " ms");
        //            System.out.println("Federated graph size : " + graph.size());
        System.out.println(sw.getTime() + " , " + graph.size());

    logger.info("Federated graph size : " + graph.size());
    //        logger.info(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter));

    //        TripleFormat f = TripleFormat.create(graph, true);
    //        f.write("/tmp/gAll.ttl");

From source file:fr.inria.edelweiss.kgdqp.core.FedInferrencingCLI.java

public static void main(String args[]) throws ParseException, EngineException, InterruptedException {

    List<String> endpoints = new ArrayList<String>();
    String queryPath = null;/*from  ww w. jav  a  2s  .  co m*/
    boolean rulesSelection = false;
    File rulesDir = null;
    File ontDir = null;

    Options options = new Options();
    Option helpOpt = new Option("h", "help", false, "print this message");
    Option queryOpt = new Option("q", "query", true, "specify the sparql query file");
    Option endpointOpt = new Option("e", "endpoint", true, "a federated sparql endpoint URL");
    Option versionOpt = new Option("v", "version", false, "print the version information and exit");
    Option rulesOpt = new Option("r", "rulesDir", true, "directory containing the inference rules");
    Option ontOpt = new Option("o", "ontologiesDir", true,
            "directory containing the ontologies for rules selection");
    //        Option selOpt = new Option("s", "rulesSelection", false, "if set to true, only the applicable rules are run");
    //        options.addOption(selOpt);

    String header = "Corese/KGRAM distributed rule engine command line interface";
    String footer = "\nPlease report any issue to alban.gaignard@cnrs.fr, olivier.corby@inria.fr";

    CommandLineParser parser = new BasicParser();
    CommandLine cmd = parser.parse(options, args);
    if (cmd.hasOption("h")) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("kgdqp", header, options, footer, true);
    if (!cmd.hasOption("e")) {
        logger.info("You must specify at least the URL of one sparql endpoint !");
    } else {
        endpoints = new ArrayList<String>(Arrays.asList(cmd.getOptionValues("e")));
    if (cmd.hasOption("o")) {
        rulesSelection = true;
        String ontDirPath = cmd.getOptionValue("o");
        ontDir = new File(ontDirPath);
        if (!ontDir.isDirectory()) {
            logger.warn(ontDirPath + " is not a valid directory path.");
    if (!cmd.hasOption("r")) {
        logger.info("You must specify a path for inference rules directory !");
    } else if (rulesSelection) {


    if (cmd.hasOption("v")) {
        logger.info("version 3.0.4-SNAPSHOT");

    String rulesDirPath = cmd.getOptionValue("r");
    rulesDir = new File(rulesDirPath);
    if (!rulesDir.isDirectory()) {
        logger.warn(rulesDirPath + " is not a valid directory path.");

    Graph graph = Graph.create();
    QueryProcessDQP execDQP = QueryProcessDQP.create(graph);
    for (String url : endpoints) {
        try {
            execDQP.addRemote(new URL(url), WSImplem.REST);
        } catch (MalformedURLException ex) {
            logger.error(url + " is not a well-formed URL");

    // Local rules graph initialization
    Graph rulesG = Graph.create();
    Load ld = Load.create(rulesG);

    if (rulesSelection) {
        // Ontology loading
        if (ontDir.isDirectory()) {
            for (File o : ontDir.listFiles()) {
                logger.info("Loading " + o.getAbsolutePath());

    // Rules loading
    if (rulesDir.isDirectory()) {
        for (File r : rulesDir.listFiles()) {
            logger.info("Loading " + r.getAbsolutePath());

    // Rule engine initialization
    RuleEngine ruleEngine = RuleEngine.create(graph);

    StopWatch sw = new StopWatch();
    logger.info("Federated graph size : " + graph.size());
    logger.info("Rules graph size : " + rulesG.size());

    // Rule selection
    logger.info("Rules selection");
    QueryProcess localKgram = QueryProcess.create(rulesG);
    ArrayList<String> applicableRules = new ArrayList<String>();
    String rulesSelQuery = "";
    if (rulesSelection) {
        rulesSelQuery = pertinentRulesQuery;
    } else {
        rulesSelQuery = allRulesQuery;
    Mappings maps = localKgram.query(rulesSelQuery);
    logger.info("Rules selected in " + sw.getTime() + " ms");
    logger.info("Applicable rules : " + maps.size());

    // Selected rule loading
    for (Mapping map : maps) {
        IDatatype dt = (IDatatype) map.getValue("?res");
        String rule = dt.getLabel();
        //loading rule in the rule engine
        //            logger.info("Adding rule : " + rule);

    // Rules application on distributed sparql endpoints
    logger.info("Rules application (" + applicableRules.size() + " rules)");
    ExecutorService threadPool = Executors.newCachedThreadPool();
    RuleEngineThread ruleThread = new RuleEngineThread(ruleEngine);

    //        ruleEngine.process();

    //monitoring loop
    while (!threadPool.isTerminated()) {
        System.out.println(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter,
                QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter));
        System.out.println("Rule engine running for " + sw.getTime() + " ms");
        System.out.println("Federated graph size : " + graph.size());

    logger.info("Federated graph size : " + graph.size());
    logger.info(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter,
            QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter));

    ///////////// Query file processing
    //        StringBuffer fileData = new StringBuffer(1000);
    //        BufferedReader reader = null;
    //        try {
    //            reader = new BufferedReader(new FileReader(queryPath));
    //        } catch (FileNotFoundException ex) {
    //             logger.error("Query file "+queryPath+" not found !");
    //             System.exit(1);
    //        }
    //        char[] buf = new char[1024];
    //        int numRead = 0;
    //        try {
    //            while ((numRead = reader.read(buf)) != -1) {
    //                String readData = String.valueOf(buf, 0, numRead);
    //                fileData.append(readData);
    //                buf = new char[1024];
    //            }
    //            reader.close();
    //        } catch (IOException ex) {
    //           logger.error("Error while reading query file "+queryPath);
    //           System.exit(1);
    //        }
    //        String sparqlQuery = fileData.toString();
    //        Query q = exec.compile(sparqlQuery,null);
    //        System.out.println(q);
    //        StopWatch sw = new StopWatch();
    //        sw.start();
    //        Mappings map = exec.query(sparqlQuery);
    //        int dqpSize = map.size();
    //        System.out.println("--------");
    //        long time = sw.getTime();
    //        System.out.println(time + " " + dqpSize);

From source file:io.bfscan.clueweb12.BuildWarcTrecIdMapping.java

public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("bz2 Wikipedia XML dump file")
            .create(INPUT_OPTION));//  w  w w .  j a  va2s  .  c o  m
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));
            .withDescription("maximum number of documents to index").create(MAX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of indexing threads")

    options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(BuildWarcTrecIdMapping.class.getCanonicalName(), options);

    String indexPath = cmdline.getOptionValue(INDEX_OPTION);
    int maxdocs = cmdline.hasOption(MAX_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_OPTION))
            : Integer.MAX_VALUE;
    int threads = cmdline.hasOption(THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(THREADS_OPTION))
            : DEFAULT_NUM_THREADS;

    long startTime = System.currentTimeMillis();

    String path = cmdline.getOptionValue(INPUT_OPTION);
    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    Directory dir = FSDirectory.open(new File(indexPath));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, ANALYZER);

    IndexWriter writer = new IndexWriter(dir, config);
    LOG.info("Creating index at " + indexPath);
    LOG.info("Indexing with " + threads + " threads");

    FileInputStream fis = null;
    BufferedReader br = null;

    try {
        fis = new FileInputStream(new File(path));
        byte[] ignoreBytes = new byte[2];
        fis.read(ignoreBytes); // "B", "Z" bytes from commandline tools
        br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fis), "UTF8"));

        ExecutorService executor = Executors.newFixedThreadPool(threads);
        int cnt = 0;
        String s;
        while ((s = br.readLine()) != null) {
            Runnable worker = new AddDocumentRunnable(writer, s);

            if (cnt % 1000000 == 0) {
                LOG.info(cnt + " articles added");
            if (cnt >= maxdocs) {

        // Wait until all threads are finish
        while (!executor.isTerminated()) {

        LOG.info("Total of " + cnt + " articles indexed.");

        if (cmdline.hasOption(OPTIMIZE_OPTION)) {
            LOG.info("Merging segments...");

        LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception e) {
    } finally {

From source file:general.Main.java

 * Selects the files to be processed and specifies the files to write to.
 * @param args Arguments to specify runtime behavior.
 *//* w ww  .  j  ava 2  s .  c  o  m*/
public static void main(String[] args) throws InvocationTargetException, NoSuchMethodException,
        InstantiationException, IllegalAccessException {
    Options options = new Options();
    options.addOption("l", "logging", false, "enables file logging");
    options.addOption("j", "jena", false, "uses the Jena SPARQL Parser");
    options.addOption("o", "openrdf", false, "uses the OpenRDF SPARQL Parser");
    options.addOption("f", "file", true, "defines the input file prefix");
    options.addOption("h", "help", false, "displays this help");
    options.addOption("t", "tsv", false, "reads from .tsv-files");
    // options.addOption("p", "parquet", false, "read from .parquet-files");
    options.addOption("n", "numberOfThreads", true, "number of used threads, default 1");
    options.addOption("b", "withBots", false, "enables metric calculation for bot queries+");
    options.addOption("p", "readPreprocessed", false, "enables reading of preprocessed files");

    //some parameters which can be changed through parameters
    //QueryHandler queryHandler = new OpenRDFQueryHandler();
    String inputFilePrefix;
    String inputFileSuffix = ".tsv";
    String queryParserName = "OpenRDF";
    Class inputHandlerClass = null;
    Class queryHandlerClass = null;
    int numberOfThreads = 1;

    CommandLineParser parser = new DefaultParser();
    CommandLine cmd;
    try {
        cmd = parser.parse(options, args);
        if (cmd.hasOption("help")) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("help", options);
        if (cmd.hasOption("openrdf")) {
            queryHandlerClass = OpenRDFQueryHandler.class;
        if (cmd.hasOption("tsv")) {
            inputFileSuffix = ".tsv";
            inputHandlerClass = InputHandlerTSV.class;
        if (cmd.hasOption("parquet")) {
            inputFileSuffix = ".parquet";
            SparkConf conf = new SparkConf().setAppName("SPARQLQueryAnalyzer").setMaster("local");
            JavaSparkContext sc = new JavaSparkContext(conf);
            inputHandlerClass = InputHandlerParquet.class;
        if (inputHandlerClass == null) {
            System.out.println("Please specify which parser to use, either -t for TSV or -p for parquet.");
        if (cmd.hasOption("file")) {
            inputFilePrefix = cmd.getOptionValue("file").trim();
        } else {
                    "Please specify at least the file which we should work on using the option '--file PREFIX' or 'f PREFIX'");
        if (cmd.hasOption("logging")) {
            LoggingHandler.initFileLog(queryParserName, inputFilePrefix);
        if (cmd.hasOption("numberOfThreads")) {
            numberOfThreads = Integer.parseInt(cmd.getOptionValue("numberOfThreads"));
        if (cmd.hasOption("withBots")) {
            withBots = true;
        if (cmd.hasOption("readPreprocessed")) {
            readPreprocessed = true;
    } catch (UnrecognizedOptionException e) {
        System.out.println("Unrecognized commandline option: " + e.getOption());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("help", options);
    } catch (ParseException e) {
                "There was an error while parsing your command line input. Did you rechecked your syntax before running?");
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("help", options);



    long startTime = System.nanoTime();

    ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);

    for (int day = 1; day <= 31; day++) {
        String inputFile = inputFilePrefix + String.format("%02d", day) + inputFileSuffix;
        Runnable parseOneMonthWorker = new ParseOneMonthWorker(inputFile, inputFilePrefix, inputHandlerClass,
                queryParserName, queryHandlerClass, day);

    while (!executor.isTerminated()) {
        //wait until all workers are finished


    long stopTime = System.nanoTime();
    long millis = TimeUnit.MILLISECONDS.convert(stopTime - startTime, TimeUnit.NANOSECONDS);
    Date date = new Date(millis);
    System.out.println("Finished executing with all threads: "
            + new SimpleDateFormat("mm-dd HH:mm:ss:SSSSSSS").format(date));

From source file:cc.wikitools.lucene.IndexWikipediaDump.java

public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("bz2 Wikipedia XML dump file")
            .create(INPUT_OPTION));//from   ww  w  .j  a v  a 2 s .  c om
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));
            .withDescription("maximum number of documents to index").create(MAX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of indexing threads")

    options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(IndexWikipediaDump.class.getCanonicalName(), options);

    String indexPath = cmdline.getOptionValue(INDEX_OPTION);
    int maxdocs = cmdline.hasOption(MAX_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_OPTION))
            : Integer.MAX_VALUE;
    int threads = cmdline.hasOption(THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(THREADS_OPTION))
            : DEFAULT_NUM_THREADS;

    long startTime = System.currentTimeMillis();

    String path = cmdline.getOptionValue(INPUT_OPTION);
    PrintStream out = new PrintStream(System.out, true, "UTF-8");
    WikiClean cleaner = new WikiCleanBuilder().withTitle(true).build();

    Directory dir = FSDirectory.open(new File(indexPath));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, ANALYZER);

    IndexWriter writer = new IndexWriter(dir, config);
    LOG.info("Creating index at " + indexPath);
    LOG.info("Indexing with " + threads + " threads");

    try {
        WikipediaBz2DumpInputStream stream = new WikipediaBz2DumpInputStream(path);

        ExecutorService executor = Executors.newFixedThreadPool(threads);
        int cnt = 0;
        String page;
        while ((page = stream.readNext()) != null) {
            String title = cleaner.getTitle(page);

            // These are heuristic specifically for filtering out non-articles in enwiki-20120104.
            if (title.startsWith("Wikipedia:") || title.startsWith("Portal:") || title.startsWith("File:")) {

            if (page.contains("#REDIRECT") || page.contains("#redirect") || page.contains("#Redirect")) {

            Runnable worker = new AddDocumentRunnable(writer, cleaner, page);

            if (cnt % 10000 == 0) {
                LOG.info(cnt + " articles added");
            if (cnt >= maxdocs) {

        // Wait until all threads are finish
        while (!executor.isTerminated()) {

        LOG.info("Total of " + cnt + " articles indexed.");

        if (cmdline.hasOption(OPTIMIZE_OPTION)) {
            LOG.info("Merging segments...");

        LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception e) {
    } finally {

From source file:com.aerospike.load.AerospikeLoad.java

public static void main(String[] args) throws IOException {

    Thread statPrinter = new Thread(new PrintStat(counters));
    try {//from   w ww  .j a v a2  s  .  co  m
        log.info("Aerospike loader started");
        Options options = new Options();
        options.addOption("h", "host", true, "Server hostname (default: localhost)");
        options.addOption("p", "port", true, "Server port (default: 3000)");
        options.addOption("n", "namespace", true, "Namespace (default: test)");
        options.addOption("s", "set", true, "Set name. (default: null)");
        options.addOption("c", "config", true, "Column definition file name");
        options.addOption("wt", "write-threads", true,
                "Number of writer threads (default: Number of cores * 5)");
        options.addOption("rt", "read-threads", true,
                "Number of reader threads (default: Number of cores * 1)");
        options.addOption("l", "rw-throttle", true, "Throttling of reader to writer(default: 10k) ");
        options.addOption("tt", "transaction-timeout", true,
                "write transaction timeout in miliseconds(default: No timeout)");
        options.addOption("et", "expiration-time", true,
                "Expiration time of records in seconds (default: never expire)");
        options.addOption("T", "timezone", true,
                "Timezone of source where data dump is taken (default: local timezone)");
        options.addOption("ec", "abort-error-count", true, "Error count to abort (default: 0)");
        options.addOption("wa", "write-action", true, "Write action if key already exists (default: update)");
        options.addOption("v", "verbose", false, "Logging all");
        options.addOption("u", "usage", false, "Print usage.");

        CommandLineParser parser = new PosixParser();
        CommandLine cl = parser.parse(options, args, false);

        if (args.length == 0 || cl.hasOption("u")) {

        if (cl.hasOption("l")) {
            rwThrottle = Integer.parseInt(cl.getOptionValue("l"));
        } else {
            rwThrottle = Constants.READLOAD;
        // Get all command line options
        params = Utils.parseParameters(cl);

        //Get client instance
        AerospikeClient client = new AerospikeClient(params.host, params.port);
        if (!client.isConnected()) {
            log.error("Client is not able to connect:" + params.host + ":" + params.port);

        if (params.verbose) {

        // Get available processors to calculate default number of threads
        int cpus = Runtime.getRuntime().availableProcessors();
        nWriterThreads = cpus * scaleFactor;
        nReaderThreads = cpus;

        // Get writer thread count
        if (cl.hasOption("wt")) {
            nWriterThreads = Integer.parseInt(cl.getOptionValue("wt"));
            nWriterThreads = (nWriterThreads > 0
                    ? (nWriterThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nWriterThreads)
                    : 1);
            log.debug("Using writer Threads: " + nWriterThreads);
        writerPool = Executors.newFixedThreadPool(nWriterThreads);

        // Get reader thread count
        if (cl.hasOption("rt")) {
            nReaderThreads = Integer.parseInt(cl.getOptionValue("rt"));
            nReaderThreads = (nReaderThreads > 0
                    ? (nReaderThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nReaderThreads)
                    : 1);
            log.debug("Using reader Threads: " + nReaderThreads);

        String columnDefinitionFileName = cl.getOptionValue("c", null);

        log.debug("Column definition files/directory: " + columnDefinitionFileName);
        if (columnDefinitionFileName == null) {
            log.error("Column definition files/directory not specified. use -c <file name>");

        File columnDefinitionFile = new File(columnDefinitionFileName);
        if (!columnDefinitionFile.exists()) {
            log.error("Column definition files/directory does not exist: "
                    + Utils.getFileName(columnDefinitionFileName));

        // Get data file list
        String[] files = cl.getArgs();
        if (files.length == 0) {
            log.error("No data file Specified: add <file/dir name> to end of the command ");
        List<String> allFileNames = new ArrayList<String>();
        allFileNames = Utils.getFileNames(files);
        if (allFileNames.size() == 0) {
            log.error("Given datafiles/directory does not exist");
        for (int i = 0; i < allFileNames.size(); i++) {
            log.debug("File names:" + Utils.getFileName(allFileNames.get(i)));
            File file = new File(allFileNames.get(i));
            counters.write.recordTotal = counters.write.recordTotal + file.length();

        //remove column definition file from list

        log.info("Number of data files:" + allFileNames.size());

         * Process column definition file to get meta data and bin mapping.
        metadataColumnDefs = new ArrayList<ColumnDefinition>();
        binColumnDefs = new ArrayList<ColumnDefinition>();
        metadataConfigs = new HashMap<String, String>();

        if (Parser.processJSONColumnDefinitions(columnDefinitionFile, metadataConfigs, metadataColumnDefs,
                binColumnDefs, params)) {
            log.info("Config file processed.");
        } else {
            throw new Exception("Config file parsing Error");

        // Add metadata of config to parameters
        String metadata;
        if ((metadata = metadataConfigs.get(Constants.INPUT_TYPE)) != null) {
            params.fileType = metadata;
            if (params.fileType.equals(Constants.CSV_FILE)) {

                // Version check
                metadata = metadataConfigs.get(Constants.VERSION);
                String[] vNumber = metadata.split("\\.");
                int v1 = Integer.parseInt(vNumber[0]);
                int v2 = Integer.parseInt(vNumber[1]);
                if ((v1 <= Constants.MajorV) && (v2 <= Constants.MinorV)) {
                    log.debug("Config version used:" + metadata);
                } else
                    throw new Exception("\"" + Constants.VERSION + ":" + metadata + "\" is not Supported");

                // Set delimiter 
                if ((metadata = metadataConfigs.get(Constants.DELIMITER)) != null && metadata.length() == 1) {
                    params.delimiter = metadata.charAt(0);
                } else {
                    log.warn("\"" + Constants.DELIMITER + ":" + metadata
                            + "\" is not properly specified in config file. Default is ','");

                if ((metadata = metadataConfigs.get(Constants.IGNORE_FIRST_LINE)) != null) {
                    params.ignoreFirstLine = metadata.equals("true");
                } else {
                    log.warn("\"" + Constants.IGNORE_FIRST_LINE + ":" + metadata
                            + "\" is not properly specified in config file. Default is false");

                if ((metadata = metadataConfigs.get(Constants.COLUMNS)) != null) {
                    counters.write.colTotal = Integer.parseInt(metadata);
                } else {
                    throw new Exception("\"" + Constants.COLUMNS + ":" + metadata
                            + "\" is not properly specified in config file");
            } else {
                throw new Exception("\"" + params.fileType + "\" is not supported in config file");
        } else {
            throw new Exception("\"" + Constants.INPUT_TYPE + "\" is not specified in config file");

        // add config input to column definitions
        if (params.fileType.equals(Constants.CSV_FILE)) {
            List<String> binName = null;
            if (params.ignoreFirstLine) {
                String line;
                BufferedReader br = new BufferedReader(
                        new InputStreamReader(new FileInputStream(allFileNames.get(0)), "UTF8"));
                if ((line = br.readLine()) != null) {
                    binName = Parser.getCSVRawColumns(line, params.delimiter);
                    if (binName.size() != counters.write.colTotal) {
                        throw new Exception("Number of column in config file and datafile are mismatch."
                                + " Datafile: " + Utils.getFileName(allFileNames.get(0)) + " Configfile: "
                                + Utils.getFileName(columnDefinitionFileName));

            //update columndefs for metadata
            for (int i = 0; i < metadataColumnDefs.size(); i++) {
                if (metadataColumnDefs.get(i).staticValue) {

                } else {
                    if (metadataColumnDefs.get(i).binValuePos < 0) {
                        if (metadataColumnDefs.get(i).columnName == null) {
                            if (metadataColumnDefs.get(i).jsonPath == null) {
                                log.error("dynamic metadata having improper info"
                                        + metadataColumnDefs.toString()); //TODO
                            } else {
                                //TODO check for json_path   
                        } else {
                            if (params.ignoreFirstLine) {
                                if (binName.indexOf(metadataColumnDefs.get(i).binValueHeader) != -1) {
                                    metadataColumnDefs.get(i).binValuePos = binName
                                } else {
                                    throw new Exception("binName missing in data file:"
                                            + metadataColumnDefs.get(i).binValueHeader);
                    } else {
                        if (params.ignoreFirstLine)
                            metadataColumnDefs.get(i).binValueHeader = binName
                if ((!metadataColumnDefs.get(i).staticValue) && (metadataColumnDefs.get(i).binValuePos < 0)) {
                    throw new Exception("Information for bin mapping is missing in config file:"
                            + metadataColumnDefs.get(i));

                if (metadataColumnDefs.get(i).srcType == null) {
                    throw new Exception(
                            "Source data type is not properly mentioned:" + metadataColumnDefs.get(i));

                if (metadataColumnDefs.get(i).binNameHeader == Constants.SET
                        && !metadataColumnDefs.get(i).srcType.equals(SrcColumnType.STRING)) {
                    throw new Exception("Set name should be string type:" + metadataColumnDefs.get(i));

                if (metadataColumnDefs.get(i).binNameHeader.equalsIgnoreCase(Constants.SET)
                        && params.set != null) {
                    throw new Exception(
                            "Set name is given both in config file and commandline. Provide only once.");

            //update columndefs for bins
            for (int i = 0; i < binColumnDefs.size(); i++) {
                if (binColumnDefs.get(i).staticName) {

                } else {
                    if (binColumnDefs.get(i).binNamePos < 0) {
                        if (binColumnDefs.get(i).columnName == null) {
                            if (binColumnDefs.get(i).jsonPath == null) {
                                log.error("dynamic bin having improper info"); //TODO
                            } else {
                                //TODO check for json_path
                        } else {
                            if (params.ignoreFirstLine) {
                                if (binName.indexOf(binColumnDefs.get(i).binNameHeader) != -1) {
                                    binColumnDefs.get(i).binNamePos = binName
                                } else {
                                    throw new Exception("binName missing in data file:"
                                            + binColumnDefs.get(i).binNameHeader);
                    } else {
                        if (params.ignoreFirstLine)
                            binColumnDefs.get(i).binNameHeader = binName.get(binColumnDefs.get(i).binNamePos);

                if (binColumnDefs.get(i).staticValue) {

                } else {
                    if (binColumnDefs.get(i).binValuePos < 0) {
                        if (binColumnDefs.get(i).columnName == null) {
                            if (binColumnDefs.get(i).jsonPath == null) {
                                log.error("dynamic bin having improper info"); //TODO
                            } else {
                                //TODO check for json_path
                        } else {
                            if (params.ignoreFirstLine) {
                                if (binName.contains(binColumnDefs.get(i).binValueHeader)) {
                                    binColumnDefs.get(i).binValuePos = binName
                                } else if (!binColumnDefs.get(i).binValueHeader.toLowerCase()
                                        .equals(Constants.SYSTEM_TIME)) {
                                    throw new Exception("Wrong column name mentioned in config file:"
                                            + binColumnDefs.get(i).binValueHeader);
                    } else {
                        if (params.ignoreFirstLine)
                            binColumnDefs.get(i).binValueHeader = binName.get(binColumnDefs.get(i).binValuePos);

                    //check for missing entries in config file
                    if (binColumnDefs.get(i).binValuePos < 0 && binColumnDefs.get(i).binValueHeader == null) {
                        throw new Exception("Information missing(Value header or bin mapping) in config file:"
                                + binColumnDefs.get(i));

                    //check for proper data type in config file.
                    if (binColumnDefs.get(i).srcType == null) {
                        throw new Exception(
                                "Source data type is not properly mentioned:" + binColumnDefs.get(i));

                    //check for valid destination type
                    if ((binColumnDefs.get(i).srcType.equals(SrcColumnType.TIMESTAMP)
                            || binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB))
                            && binColumnDefs.get(i).dstType == null) {
                        throw new Exception("Destination type is not mentioned: " + binColumnDefs.get(i));

                    //check for encoding
                    if (binColumnDefs.get(i).dstType != null && binColumnDefs.get(i).encoding == null) {
                        throw new Exception(
                                "Encoding is not given for src-dst type conversion:" + binColumnDefs.get(i));

                    //check for valid encoding
                    if (binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB)
                            && !binColumnDefs.get(i).encoding.equals(Constants.HEX_ENCODING)) {
                        throw new Exception("Wrong encoding for blob data:" + binColumnDefs.get(i));

                //Check static bin name mapped to dynamic bin value
                if ((binColumnDefs.get(i).binNamePos == binColumnDefs.get(i).binValuePos)
                        && (binColumnDefs.get(i).binNamePos != -1)) {
                    throw new Exception("Static bin name mapped to dynamic bin value:" + binColumnDefs.get(i));

                //check for missing entries in config file
                if (binColumnDefs.get(i).binNameHeader == null
                        && binColumnDefs.get(i).binNameHeader.length() > Constants.BIN_NAME_LENGTH) {
                    throw new Exception("Information missing binName or large binName in config file:"
                            + binColumnDefs.get(i));

        log.debug("MetadataConfig:" + metadataColumnDefs);
        log.debug("BinColumnDefs:" + binColumnDefs);

        // Start PrintStat thread

        // Reader pool size
        ExecutorService readerPool = Executors.newFixedThreadPool(
                nReaderThreads > allFileNames.size() ? allFileNames.size() : nReaderThreads);
        log.info("Reader pool size : " + nReaderThreads);

        // Submit all tasks to writer threadpool.
        for (String aFile : allFileNames) {
            log.debug("Submitting task for: " + aFile);
            readerPool.submit(new AerospikeLoad(aFile, client, params));

        // Wait for reader pool to complete
        log.info("Shutdown down reader thread pool");

        while (!readerPool.isTerminated())
        //readerPool.awaitTermination(20, TimeUnit.MINUTES);
        log.info("Reader thread pool terminated");

        // Wait for writer pool to complete after getting all tasks from reader pool
        log.info("Shutdown down writer thread pool");

        while (!writerPool.isTerminated())
        log.info("Writer thread pool terminated");

        // Print final statistic of aerospike-loader.
        log.info("Final Statistics of importer: (Succesfull Writes = " + counters.write.writeCount.get() + ", "
                + "Errors="
                + (counters.write.writeErrors.get() + counters.write.readErrors.get()
                        + counters.write.processingErrors.get())
                + "(" + (counters.write.writeErrors.get()) + "-Write," + counters.write.readErrors.get()
                + "-Read," + counters.write.processingErrors.get() + "-Processing)");
    } catch (Exception e) {
        if (log.isDebugEnabled()) {
    } finally {
        // Stop statistic printer thread.
        log.info("Aerospike loader completed");

From source file:accumulo.AccumuloStuff.java

public static void main(String[] args) throws Exception {
    File tmp = new File(System.getProperty("user.dir") + "/target/mac-test");
    if (tmp.exists()) {
        FileUtils.deleteDirectory(tmp);/*from   w  ww .  ja v  a 2  s.  c  om*/
    String passwd = "password";

    MiniAccumuloConfigImpl cfg = new MiniAccumuloConfigImpl(tmp, passwd);
    //    cfg.useMiniDFS(true);

    final MiniAccumuloClusterImpl cluster = cfg.build();

    ExecutorService svc = Executors.newFixedThreadPool(2);

    try {
        Connector conn = cluster.getConnector("root", passwd);
        String table = "table";

        final BatchWriter bw = conn.createBatchWriter(table, new BatchWriterConfig());
        final AtomicBoolean flushed = new AtomicBoolean(false);

        Runnable writer = new Runnable() {
            public void run() {
                try {
                    Mutation m = new Mutation("row");
                    m.put("colf", "colq", "value");
                } catch (Exception e) {
                    log.error("Got exception trying to flush mutation", e);

                log.info("Exiting batchwriter thread");

        Runnable restarter = new Runnable() {
            public void run() {
                try {
                    for (ProcessReference proc : cluster.getProcesses().get(ServerType.TABLET_SERVER)) {
                        cluster.killProcess(ServerType.TABLET_SERVER, proc);
                } catch (Exception e) {
                    log.error("Caught exception restarting tabletserver", e);
                log.info("Exiting restart thread");


        log.info("Waiting for shutdown");
        if (!svc.awaitTermination(120, TimeUnit.SECONDS)) {
            log.info("Timeout on shutdown exceeded");
        } else {
            log.info("Cleanly shutdown");
            log.info("Threadpool is terminated? " + svc.isTerminated());

        if (flushed.get()) {
            log.info("****** BatchWriter was flushed *********");
        } else {
            log.info("****** BatchWriter was NOT flushed *********");


        log.info("Got record {}", Iterables.getOnlyElement(conn.createScanner(table, Authorizations.EMPTY)));
    } finally {