List of usage examples for java.util.concurrent ExecutorService isTerminated
boolean isTerminated();
From source file:Main.java
public static void main(String[] args) { ExecutorService pool = Executors.newCachedThreadPool(); Main app = new Main(); for (int i = 0; i < 1000; i++) { pool.execute(app.new Adding()); }/*from w w w .j av a 2 s. c o m*/ pool.shutdown(); while (!pool.isTerminated()) { System.out.println(" Is it done? : " + pool.isTerminated()); } System.out.println(" Is it done? : " + pool.isTerminated()); System.out.println("Sum is " + app.sum); }
From source file:WordLengthCallable.java
public static void main(String[] args) throws Exception { int THREAD_COUNT = 4; ExecutorService execService = Executors.newFixedThreadPool(THREAD_COUNT); CompletionService<Integer> completionService = new ExecutorCompletionService<>(execService); for (int i = 0; i < THREAD_COUNT; i++) { completionService.submit(new WordLengthCallable()); }/* w w w . j av a2 s. c o m*/ execService.shutdown(); while (!execService.isTerminated()) { int result = completionService.take().get().intValue(); System.out.println("Result is: " + result); } Thread.sleep(1000); System.out.println("done!"); }
From source file:fr.inria.edelweiss.kgdqp.core.CentralizedInferrencing.java
public static void main(String args[]) throws ParseException, EngineException, InterruptedException, IOException { List<String> endpoints = new ArrayList<String>(); String queryPath = null;/* ww w . j a v a2s . c om*/ boolean rulesSelection = false; File rulesDir = null; File ontDir = null; ///////////////// Graph graph = Graph.create(); QueryProcess exec = QueryProcess.create(graph); Options options = new Options(); Option helpOpt = new Option("h", "help", false, "print this message"); // Option queryOpt = new Option("q", "query", true, "specify the sparql query file"); // Option endpointOpt = new Option("e", "endpoint", true, "a federated sparql endpoint URL"); Option versionOpt = new Option("v", "version", false, "print the version information and exit"); Option rulesOpt = new Option("r", "rulesDir", true, "directory containing the inference rules"); Option ontOpt = new Option("o", "ontologiesDir", true, "directory containing the ontologies for rules selection"); // Option locOpt = new Option("c", "centralized", false, "performs centralized inferences"); Option dataOpt = new Option("l", "load", true, "data file or directory to be loaded"); // Option selOpt = new Option("s", "rulesSelection", false, "if set to true, only the applicable rules are run"); // options.addOption(queryOpt); // options.addOption(endpointOpt); options.addOption(helpOpt); options.addOption(versionOpt); options.addOption(rulesOpt); options.addOption(ontOpt); // options.addOption(selOpt); // options.addOption(locOpt); options.addOption(dataOpt); String header = "Corese/KGRAM rule engine experiment command line interface"; String footer = "\nPlease report any issue to alban.gaignard@cnrs.fr, olivier.corby@inria.fr"; CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("kgdqp", header, options, footer, true); System.exit(0); } if (cmd.hasOption("o")) { rulesSelection = true; String ontDirPath = cmd.getOptionValue("o"); ontDir = new File(ontDirPath); if (!ontDir.isDirectory()) { logger.warn(ontDirPath + " is not a valid directory path."); System.exit(0); } } if (!cmd.hasOption("r")) { logger.info("You must specify a path for inference rules directory !"); System.exit(0); } if (cmd.hasOption("l")) { String[] dataPaths = cmd.getOptionValues("l"); for (String path : dataPaths) { Load ld = Load.create(graph); ld.load(path); logger.info("Loaded " + path); } } if (cmd.hasOption("v")) { logger.info("version 3.0.4-SNAPSHOT"); System.exit(0); } String rulesDirPath = cmd.getOptionValue("r"); rulesDir = new File(rulesDirPath); if (!rulesDir.isDirectory()) { logger.warn(rulesDirPath + " is not a valid directory path."); System.exit(0); } // Local rules graph initialization Graph rulesG = Graph.create(); Load ld = Load.create(rulesG); if (rulesSelection) { // Ontology loading if (ontDir.isDirectory()) { for (File o : ontDir.listFiles()) { logger.info("Loading " + o.getAbsolutePath()); ld.load(o.getAbsolutePath()); } } } // Rules loading if (rulesDir.isDirectory()) { for (File r : rulesDir.listFiles()) { logger.info("Loading " + r.getAbsolutePath()); ld.load(r.getAbsolutePath()); } } // Rule engine initialization RuleEngine ruleEngine = RuleEngine.create(graph); ruleEngine.set(exec); ruleEngine.setOptimize(true); ruleEngine.setConstructResult(true); ruleEngine.setTrace(true); StopWatch sw = new StopWatch(); logger.info("Federated graph size : " + graph.size()); logger.info("Rules graph size : " + rulesG.size()); // Rule selection logger.info("Rules selection"); QueryProcess localKgram = QueryProcess.create(rulesG); ArrayList<String> applicableRules = new ArrayList<String>(); sw.start(); String rulesSelQuery = ""; if (rulesSelection) { rulesSelQuery = pertinentRulesQuery; } else { rulesSelQuery = allRulesQuery; } Mappings maps = localKgram.query(rulesSelQuery); logger.info("Rules selected in " + sw.getTime() + " ms"); logger.info("Applicable rules : " + maps.size()); // Selected rule loading for (Mapping map : maps) { IDatatype dt = (IDatatype) map.getValue("?res"); String rule = dt.getLabel(); //loading rule in the rule engine // logger.info("Adding rule : "); // System.out.println("-------"); // System.out.println(rule); // System.out.println(""); // if (! rule.toLowerCase().contains("sameas")) { applicableRules.add(rule); ruleEngine.addRule(rule); // } } // Rules application on distributed sparql endpoints logger.info("Rules application (" + applicableRules.size() + " rules)"); ExecutorService threadPool = Executors.newCachedThreadPool(); RuleEngineThread ruleThread = new RuleEngineThread(ruleEngine); sw.reset(); sw.start(); // ruleEngine.process(); threadPool.execute(ruleThread); threadPool.shutdown(); //monitoring loop while (!threadPool.isTerminated()) { // System.out.println("******************************"); // System.out.println(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter)); // System.out.println("Rule engine running for " + sw.getTime() + " ms"); // System.out.println("Federated graph size : " + graph.size()); System.out.println(sw.getTime() + " , " + graph.size()); Thread.sleep(5000); } logger.info("Federated graph size : " + graph.size()); // logger.info(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter)); // TripleFormat f = TripleFormat.create(graph, true); // f.write("/tmp/gAll.ttl"); }
From source file:fr.inria.edelweiss.kgdqp.core.CentralizedInferrencingNoSpin.java
public static void main(String args[]) throws ParseException, EngineException, InterruptedException, IOException, LoadException { List<String> endpoints = new ArrayList<String>(); String queryPath = null;/* ww w .j a v a2 s.co m*/ boolean rulesSelection = false; File rulesDir = null; File ontDir = null; ///////////////// Graph graph = Graph.create(); QueryProcess exec = QueryProcess.create(graph); Options options = new Options(); Option helpOpt = new Option("h", "help", false, "print this message"); // Option queryOpt = new Option("q", "query", true, "specify the sparql query file"); // Option endpointOpt = new Option("e", "endpoint", true, "a federated sparql endpoint URL"); Option versionOpt = new Option("v", "version", false, "print the version information and exit"); Option rulesOpt = new Option("r", "rulesDir", true, "directory containing the inference rules"); Option ontOpt = new Option("o", "ontologiesDir", true, "directory containing the ontologies for rules selection"); // Option locOpt = new Option("c", "centralized", false, "performs centralized inferences"); Option dataOpt = new Option("l", "load", true, "data file or directory to be loaded"); // Option selOpt = new Option("s", "rulesSelection", false, "if set to true, only the applicable rules are run"); // options.addOption(queryOpt); // options.addOption(endpointOpt); options.addOption(helpOpt); options.addOption(versionOpt); options.addOption(rulesOpt); options.addOption(ontOpt); // options.addOption(selOpt); // options.addOption(locOpt); options.addOption(dataOpt); String header = "Corese/KGRAM rule engine experiment command line interface"; String footer = "\nPlease report any issue to alban.gaignard@cnrs.fr, olivier.corby@inria.fr"; CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("kgdqp", header, options, footer, true); System.exit(0); } if (cmd.hasOption("o")) { rulesSelection = true; String ontDirPath = cmd.getOptionValue("o"); ontDir = new File(ontDirPath); if (!ontDir.isDirectory()) { logger.warn(ontDirPath + " is not a valid directory path."); System.exit(0); } } if (!cmd.hasOption("r")) { logger.info("You must specify a path for inference rules directory !"); System.exit(0); } if (cmd.hasOption("l")) { String[] dataPaths = cmd.getOptionValues("l"); for (String path : dataPaths) { Load ld = Load.create(graph); ld.load(path); logger.info("Loaded " + path); } } if (cmd.hasOption("v")) { logger.info("version 3.0.4-SNAPSHOT"); System.exit(0); } String rulesDirPath = cmd.getOptionValue("r"); rulesDir = new File(rulesDirPath); if (!rulesDir.isDirectory()) { logger.warn(rulesDirPath + " is not a valid directory path."); System.exit(0); } // Local rules graph initialization Graph rulesG = Graph.create(); Load ld = Load.create(rulesG); if (rulesSelection) { // Ontology loading if (ontDir.isDirectory()) { for (File o : ontDir.listFiles()) { logger.info("Loading " + o.getAbsolutePath()); ld.load(o.getAbsolutePath()); } } } // Rules loading if (rulesDir.isDirectory()) { for (File r : rulesDir.listFiles()) { if (r.getAbsolutePath().endsWith(".rq")) { logger.info("Loading " + r.getAbsolutePath()); // ld.load(r.getAbsolutePath()); // byte[] encoded = Files.readAllBytes(Paths.get(r.getAbsolutePath())); // String construct = new String(encoded, "UTF-8"); //StandardCharsets.UTF_8); FileInputStream f = new FileInputStream(r); QueryLoad ql = QueryLoad.create(); String construct = ql.read(f); f.close(); SPINProcess sp = SPINProcess.create(); String spinConstruct = sp.toSpin(construct); ld.load(new ByteArrayInputStream(spinConstruct.getBytes()), Load.TURTLE_FORMAT); logger.info("Rules graph size : " + rulesG.size()); } } } // Rule engine initialization RuleEngine ruleEngine = RuleEngine.create(graph); ruleEngine.set(exec); ruleEngine.setOptimize(true); ruleEngine.setConstructResult(true); ruleEngine.setTrace(true); StopWatch sw = new StopWatch(); logger.info("Federated graph size : " + graph.size()); logger.info("Rules graph size : " + rulesG.size()); // Rule selection logger.info("Rules selection"); QueryProcess localKgram = QueryProcess.create(rulesG); ArrayList<String> applicableRules = new ArrayList<String>(); sw.start(); String rulesSelQuery = ""; if (rulesSelection) { rulesSelQuery = pertinentRulesQuery; } else { rulesSelQuery = allRulesQuery; } Mappings maps = localKgram.query(rulesSelQuery); logger.info("Rules selected in " + sw.getTime() + " ms"); logger.info("Applicable rules : " + maps.size()); // Selected rule loading for (Mapping map : maps) { IDatatype dt = (IDatatype) map.getValue("?res"); String rule = dt.getLabel(); //loading rule in the rule engine // logger.info("Adding rule : "); // System.out.println("-------"); // System.out.println(rule); // System.out.println(""); // if (! rule.toLowerCase().contains("sameas")) { applicableRules.add(rule); ruleEngine.addRule(rule); // } } // Rules application on distributed sparql endpoints logger.info("Rules application (" + applicableRules.size() + " rules)"); ExecutorService threadPool = Executors.newCachedThreadPool(); RuleEngineThread ruleThread = new RuleEngineThread(ruleEngine); sw.reset(); sw.start(); // ruleEngine.process(); threadPool.execute(ruleThread); threadPool.shutdown(); //monitoring loop while (!threadPool.isTerminated()) { // System.out.println("******************************"); // System.out.println(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter)); // System.out.println("Rule engine running for " + sw.getTime() + " ms"); // System.out.println("Federated graph size : " + graph.size()); System.out.println(sw.getTime() + " , " + graph.size()); Thread.sleep(5000); } logger.info("Federated graph size : " + graph.size()); // logger.info(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter)); // TripleFormat f = TripleFormat.create(graph, true); // f.write("/tmp/gAll.ttl"); }
From source file:fr.inria.edelweiss.kgdqp.core.FedInferrencingCLI.java
public static void main(String args[]) throws ParseException, EngineException, InterruptedException { List<String> endpoints = new ArrayList<String>(); String queryPath = null;/*from ww w. jav a 2s . co m*/ boolean rulesSelection = false; File rulesDir = null; File ontDir = null; Options options = new Options(); Option helpOpt = new Option("h", "help", false, "print this message"); Option queryOpt = new Option("q", "query", true, "specify the sparql query file"); Option endpointOpt = new Option("e", "endpoint", true, "a federated sparql endpoint URL"); Option versionOpt = new Option("v", "version", false, "print the version information and exit"); Option rulesOpt = new Option("r", "rulesDir", true, "directory containing the inference rules"); Option ontOpt = new Option("o", "ontologiesDir", true, "directory containing the ontologies for rules selection"); // Option selOpt = new Option("s", "rulesSelection", false, "if set to true, only the applicable rules are run"); options.addOption(queryOpt); options.addOption(endpointOpt); options.addOption(helpOpt); options.addOption(versionOpt); options.addOption(rulesOpt); options.addOption(ontOpt); // options.addOption(selOpt); String header = "Corese/KGRAM distributed rule engine command line interface"; String footer = "\nPlease report any issue to alban.gaignard@cnrs.fr, olivier.corby@inria.fr"; CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("kgdqp", header, options, footer, true); System.exit(0); } if (!cmd.hasOption("e")) { logger.info("You must specify at least the URL of one sparql endpoint !"); System.exit(0); } else { endpoints = new ArrayList<String>(Arrays.asList(cmd.getOptionValues("e"))); } if (cmd.hasOption("o")) { rulesSelection = true; String ontDirPath = cmd.getOptionValue("o"); ontDir = new File(ontDirPath); if (!ontDir.isDirectory()) { logger.warn(ontDirPath + " is not a valid directory path."); System.exit(0); } } if (!cmd.hasOption("r")) { logger.info("You must specify a path for inference rules directory !"); System.exit(0); } else if (rulesSelection) { } if (cmd.hasOption("v")) { logger.info("version 3.0.4-SNAPSHOT"); System.exit(0); } String rulesDirPath = cmd.getOptionValue("r"); rulesDir = new File(rulesDirPath); if (!rulesDir.isDirectory()) { logger.warn(rulesDirPath + " is not a valid directory path."); System.exit(0); } ///////////////// Graph graph = Graph.create(); QueryProcessDQP execDQP = QueryProcessDQP.create(graph); for (String url : endpoints) { try { execDQP.addRemote(new URL(url), WSImplem.REST); } catch (MalformedURLException ex) { logger.error(url + " is not a well-formed URL"); System.exit(1); } } // Local rules graph initialization Graph rulesG = Graph.create(); Load ld = Load.create(rulesG); if (rulesSelection) { // Ontology loading if (ontDir.isDirectory()) { for (File o : ontDir.listFiles()) { logger.info("Loading " + o.getAbsolutePath()); ld.load(o.getAbsolutePath()); } } } // Rules loading if (rulesDir.isDirectory()) { for (File r : rulesDir.listFiles()) { logger.info("Loading " + r.getAbsolutePath()); ld.load(r.getAbsolutePath()); } } // Rule engine initialization RuleEngine ruleEngine = RuleEngine.create(graph); ruleEngine.set(execDQP); StopWatch sw = new StopWatch(); logger.info("Federated graph size : " + graph.size()); logger.info("Rules graph size : " + rulesG.size()); // Rule selection logger.info("Rules selection"); QueryProcess localKgram = QueryProcess.create(rulesG); ArrayList<String> applicableRules = new ArrayList<String>(); sw.start(); String rulesSelQuery = ""; if (rulesSelection) { rulesSelQuery = pertinentRulesQuery; } else { rulesSelQuery = allRulesQuery; } Mappings maps = localKgram.query(rulesSelQuery); logger.info("Rules selected in " + sw.getTime() + " ms"); logger.info("Applicable rules : " + maps.size()); // Selected rule loading for (Mapping map : maps) { IDatatype dt = (IDatatype) map.getValue("?res"); String rule = dt.getLabel(); //loading rule in the rule engine // logger.info("Adding rule : " + rule); applicableRules.add(rule); ruleEngine.addRule(rule); } // Rules application on distributed sparql endpoints logger.info("Rules application (" + applicableRules.size() + " rules)"); ExecutorService threadPool = Executors.newCachedThreadPool(); RuleEngineThread ruleThread = new RuleEngineThread(ruleEngine); sw.reset(); sw.start(); // ruleEngine.process(); threadPool.execute(ruleThread); threadPool.shutdown(); //monitoring loop while (!threadPool.isTerminated()) { System.out.println("******************************"); System.out.println(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter)); System.out.println("Rule engine running for " + sw.getTime() + " ms"); System.out.println("Federated graph size : " + graph.size()); Thread.sleep(10000); } logger.info("Federated graph size : " + graph.size()); logger.info(Util.jsonDqpCost(QueryProcessDQP.queryCounter, QueryProcessDQP.queryVolumeCounter, QueryProcessDQP.sourceCounter, QueryProcessDQP.sourceVolumeCounter)); ///////////// Query file processing // StringBuffer fileData = new StringBuffer(1000); // BufferedReader reader = null; // try { // reader = new BufferedReader(new FileReader(queryPath)); // } catch (FileNotFoundException ex) { // logger.error("Query file "+queryPath+" not found !"); // System.exit(1); // } // char[] buf = new char[1024]; // int numRead = 0; // try { // while ((numRead = reader.read(buf)) != -1) { // String readData = String.valueOf(buf, 0, numRead); // fileData.append(readData); // buf = new char[1024]; // } // reader.close(); // } catch (IOException ex) { // logger.error("Error while reading query file "+queryPath); // System.exit(1); // } // // String sparqlQuery = fileData.toString(); // // Query q = exec.compile(sparqlQuery,null); // System.out.println(q); // // StopWatch sw = new StopWatch(); // sw.start(); // Mappings map = exec.query(sparqlQuery); // int dqpSize = map.size(); // System.out.println("--------"); // long time = sw.getTime(); // System.out.println(time + " " + dqpSize); }
From source file:io.bfscan.clueweb12.BuildWarcTrecIdMapping.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("bz2 Wikipedia XML dump file") .create(INPUT_OPTION));// w w w . j a va2s . c o m options.addOption( OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("maximum number of documents to index").create(MAX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of indexing threads") .create(THREADS_OPTION)); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(BuildWarcTrecIdMapping.class.getCanonicalName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); int maxdocs = cmdline.hasOption(MAX_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_OPTION)) : Integer.MAX_VALUE; int threads = cmdline.hasOption(THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(THREADS_OPTION)) : DEFAULT_NUM_THREADS; long startTime = System.currentTimeMillis(); String path = cmdline.getOptionValue(INPUT_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); LOG.info("Creating index at " + indexPath); LOG.info("Indexing with " + threads + " threads"); FileInputStream fis = null; BufferedReader br = null; try { fis = new FileInputStream(new File(path)); byte[] ignoreBytes = new byte[2]; fis.read(ignoreBytes); // "B", "Z" bytes from commandline tools br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fis), "UTF8")); ExecutorService executor = Executors.newFixedThreadPool(threads); int cnt = 0; String s; while ((s = br.readLine()) != null) { Runnable worker = new AddDocumentRunnable(writer, s); executor.execute(worker); cnt++; if (cnt % 1000000 == 0) { LOG.info(cnt + " articles added"); } if (cnt >= maxdocs) { break; } } executor.shutdown(); // Wait until all threads are finish while (!executor.isTerminated()) { } LOG.info("Total of " + cnt + " articles indexed."); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); out.close(); br.close(); fis.close(); } }
From source file:general.Main.java
/** * Selects the files to be processed and specifies the files to write to. * * @param args Arguments to specify runtime behavior. *//* w ww . j ava 2 s . c o m*/ public static void main(String[] args) throws InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException { Options options = new Options(); options.addOption("l", "logging", false, "enables file logging"); options.addOption("j", "jena", false, "uses the Jena SPARQL Parser"); options.addOption("o", "openrdf", false, "uses the OpenRDF SPARQL Parser"); options.addOption("f", "file", true, "defines the input file prefix"); options.addOption("h", "help", false, "displays this help"); options.addOption("t", "tsv", false, "reads from .tsv-files"); // options.addOption("p", "parquet", false, "read from .parquet-files"); options.addOption("n", "numberOfThreads", true, "number of used threads, default 1"); options.addOption("b", "withBots", false, "enables metric calculation for bot queries+"); options.addOption("p", "readPreprocessed", false, "enables reading of preprocessed files"); //some parameters which can be changed through parameters //QueryHandler queryHandler = new OpenRDFQueryHandler(); String inputFilePrefix; String inputFileSuffix = ".tsv"; String queryParserName = "OpenRDF"; Class inputHandlerClass = null; Class queryHandlerClass = null; int numberOfThreads = 1; CommandLineParser parser = new DefaultParser(); CommandLine cmd; try { cmd = parser.parse(options, args); if (cmd.hasOption("help")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("help", options); return; } if (cmd.hasOption("openrdf")) { queryHandlerClass = OpenRDFQueryHandler.class; } if (cmd.hasOption("tsv")) { inputFileSuffix = ".tsv"; inputHandlerClass = InputHandlerTSV.class; } if (cmd.hasOption("parquet")) { inputFileSuffix = ".parquet"; Logger.getLogger("org").setLevel(Level.WARN); Logger.getLogger("akka").setLevel(Level.WARN); SparkConf conf = new SparkConf().setAppName("SPARQLQueryAnalyzer").setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); inputHandlerClass = InputHandlerParquet.class; } if (inputHandlerClass == null) { System.out.println("Please specify which parser to use, either -t for TSV or -p for parquet."); } if (cmd.hasOption("file")) { inputFilePrefix = cmd.getOptionValue("file").trim(); } else { System.out.println( "Please specify at least the file which we should work on using the option '--file PREFIX' or 'f PREFIX'"); return; } if (cmd.hasOption("logging")) { LoggingHandler.initFileLog(queryParserName, inputFilePrefix); } if (cmd.hasOption("numberOfThreads")) { numberOfThreads = Integer.parseInt(cmd.getOptionValue("numberOfThreads")); } if (cmd.hasOption("withBots")) { withBots = true; } if (cmd.hasOption("readPreprocessed")) { readPreprocessed = true; } } catch (UnrecognizedOptionException e) { System.out.println("Unrecognized commandline option: " + e.getOption()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("help", options); return; } catch (ParseException e) { System.out.println( "There was an error while parsing your command line input. Did you rechecked your syntax before running?"); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("help", options); return; } LoggingHandler.initConsoleLog(); loadPreBuildQueryTypes(); long startTime = System.nanoTime(); ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads); for (int day = 1; day <= 31; day++) { String inputFile = inputFilePrefix + String.format("%02d", day) + inputFileSuffix; Runnable parseOneMonthWorker = new ParseOneMonthWorker(inputFile, inputFilePrefix, inputHandlerClass, queryParserName, queryHandlerClass, day); executor.execute(parseOneMonthWorker); } executor.shutdown(); while (!executor.isTerminated()) { //wait until all workers are finished } writeQueryTypes(inputFilePrefix); long stopTime = System.nanoTime(); long millis = TimeUnit.MILLISECONDS.convert(stopTime - startTime, TimeUnit.NANOSECONDS); Date date = new Date(millis); System.out.println("Finished executing with all threads: " + new SimpleDateFormat("mm-dd HH:mm:ss:SSSSSSS").format(date)); }
From source file:cc.wikitools.lucene.IndexWikipediaDump.java
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("bz2 Wikipedia XML dump file") .create(INPUT_OPTION));//from ww w .j a v a 2 s . c om options.addOption( OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("maximum number of documents to index").create(MAX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of indexing threads") .create(THREADS_OPTION)); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexWikipediaDump.class.getCanonicalName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); int maxdocs = cmdline.hasOption(MAX_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_OPTION)) : Integer.MAX_VALUE; int threads = cmdline.hasOption(THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(THREADS_OPTION)) : DEFAULT_NUM_THREADS; long startTime = System.currentTimeMillis(); String path = cmdline.getOptionValue(INPUT_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); WikiClean cleaner = new WikiCleanBuilder().withTitle(true).build(); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); LOG.info("Creating index at " + indexPath); LOG.info("Indexing with " + threads + " threads"); try { WikipediaBz2DumpInputStream stream = new WikipediaBz2DumpInputStream(path); ExecutorService executor = Executors.newFixedThreadPool(threads); int cnt = 0; String page; while ((page = stream.readNext()) != null) { String title = cleaner.getTitle(page); // These are heuristic specifically for filtering out non-articles in enwiki-20120104. if (title.startsWith("Wikipedia:") || title.startsWith("Portal:") || title.startsWith("File:")) { continue; } if (page.contains("#REDIRECT") || page.contains("#redirect") || page.contains("#Redirect")) { continue; } Runnable worker = new AddDocumentRunnable(writer, cleaner, page); executor.execute(worker); cnt++; if (cnt % 10000 == 0) { LOG.info(cnt + " articles added"); } if (cnt >= maxdocs) { break; } } executor.shutdown(); // Wait until all threads are finish while (!executor.isTerminated()) { } LOG.info("Total of " + cnt + " articles indexed."); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); out.close(); } }
From source file:com.aerospike.load.AerospikeLoad.java
public static void main(String[] args) throws IOException { Thread statPrinter = new Thread(new PrintStat(counters)); try {//from w ww .j a v a2 s . co m log.info("Aerospike loader started"); Options options = new Options(); options.addOption("h", "host", true, "Server hostname (default: localhost)"); options.addOption("p", "port", true, "Server port (default: 3000)"); options.addOption("n", "namespace", true, "Namespace (default: test)"); options.addOption("s", "set", true, "Set name. (default: null)"); options.addOption("c", "config", true, "Column definition file name"); options.addOption("wt", "write-threads", true, "Number of writer threads (default: Number of cores * 5)"); options.addOption("rt", "read-threads", true, "Number of reader threads (default: Number of cores * 1)"); options.addOption("l", "rw-throttle", true, "Throttling of reader to writer(default: 10k) "); options.addOption("tt", "transaction-timeout", true, "write transaction timeout in miliseconds(default: No timeout)"); options.addOption("et", "expiration-time", true, "Expiration time of records in seconds (default: never expire)"); options.addOption("T", "timezone", true, "Timezone of source where data dump is taken (default: local timezone)"); options.addOption("ec", "abort-error-count", true, "Error count to abort (default: 0)"); options.addOption("wa", "write-action", true, "Write action if key already exists (default: update)"); options.addOption("v", "verbose", false, "Logging all"); options.addOption("u", "usage", false, "Print usage."); CommandLineParser parser = new PosixParser(); CommandLine cl = parser.parse(options, args, false); if (args.length == 0 || cl.hasOption("u")) { logUsage(options); return; } if (cl.hasOption("l")) { rwThrottle = Integer.parseInt(cl.getOptionValue("l")); } else { rwThrottle = Constants.READLOAD; } // Get all command line options params = Utils.parseParameters(cl); //Get client instance AerospikeClient client = new AerospikeClient(params.host, params.port); if (!client.isConnected()) { log.error("Client is not able to connect:" + params.host + ":" + params.port); return; } if (params.verbose) { log.setLevel(Level.DEBUG); } // Get available processors to calculate default number of threads int cpus = Runtime.getRuntime().availableProcessors(); nWriterThreads = cpus * scaleFactor; nReaderThreads = cpus; // Get writer thread count if (cl.hasOption("wt")) { nWriterThreads = Integer.parseInt(cl.getOptionValue("wt")); nWriterThreads = (nWriterThreads > 0 ? (nWriterThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nWriterThreads) : 1); log.debug("Using writer Threads: " + nWriterThreads); } writerPool = Executors.newFixedThreadPool(nWriterThreads); // Get reader thread count if (cl.hasOption("rt")) { nReaderThreads = Integer.parseInt(cl.getOptionValue("rt")); nReaderThreads = (nReaderThreads > 0 ? (nReaderThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nReaderThreads) : 1); log.debug("Using reader Threads: " + nReaderThreads); } String columnDefinitionFileName = cl.getOptionValue("c", null); log.debug("Column definition files/directory: " + columnDefinitionFileName); if (columnDefinitionFileName == null) { log.error("Column definition files/directory not specified. use -c <file name>"); return; } File columnDefinitionFile = new File(columnDefinitionFileName); if (!columnDefinitionFile.exists()) { log.error("Column definition files/directory does not exist: " + Utils.getFileName(columnDefinitionFileName)); return; } // Get data file list String[] files = cl.getArgs(); if (files.length == 0) { log.error("No data file Specified: add <file/dir name> to end of the command "); return; } List<String> allFileNames = new ArrayList<String>(); allFileNames = Utils.getFileNames(files); if (allFileNames.size() == 0) { log.error("Given datafiles/directory does not exist"); return; } for (int i = 0; i < allFileNames.size(); i++) { log.debug("File names:" + Utils.getFileName(allFileNames.get(i))); File file = new File(allFileNames.get(i)); counters.write.recordTotal = counters.write.recordTotal + file.length(); } //remove column definition file from list allFileNames.remove(columnDefinitionFileName); log.info("Number of data files:" + allFileNames.size()); /** * Process column definition file to get meta data and bin mapping. */ metadataColumnDefs = new ArrayList<ColumnDefinition>(); binColumnDefs = new ArrayList<ColumnDefinition>(); metadataConfigs = new HashMap<String, String>(); if (Parser.processJSONColumnDefinitions(columnDefinitionFile, metadataConfigs, metadataColumnDefs, binColumnDefs, params)) { log.info("Config file processed."); } else { throw new Exception("Config file parsing Error"); } // Add metadata of config to parameters String metadata; if ((metadata = metadataConfigs.get(Constants.INPUT_TYPE)) != null) { params.fileType = metadata; if (params.fileType.equals(Constants.CSV_FILE)) { // Version check metadata = metadataConfigs.get(Constants.VERSION); String[] vNumber = metadata.split("\\."); int v1 = Integer.parseInt(vNumber[0]); int v2 = Integer.parseInt(vNumber[1]); if ((v1 <= Constants.MajorV) && (v2 <= Constants.MinorV)) { log.debug("Config version used:" + metadata); } else throw new Exception("\"" + Constants.VERSION + ":" + metadata + "\" is not Supported"); // Set delimiter if ((metadata = metadataConfigs.get(Constants.DELIMITER)) != null && metadata.length() == 1) { params.delimiter = metadata.charAt(0); } else { log.warn("\"" + Constants.DELIMITER + ":" + metadata + "\" is not properly specified in config file. Default is ','"); } if ((metadata = metadataConfigs.get(Constants.IGNORE_FIRST_LINE)) != null) { params.ignoreFirstLine = metadata.equals("true"); } else { log.warn("\"" + Constants.IGNORE_FIRST_LINE + ":" + metadata + "\" is not properly specified in config file. Default is false"); } if ((metadata = metadataConfigs.get(Constants.COLUMNS)) != null) { counters.write.colTotal = Integer.parseInt(metadata); } else { throw new Exception("\"" + Constants.COLUMNS + ":" + metadata + "\" is not properly specified in config file"); } } else { throw new Exception("\"" + params.fileType + "\" is not supported in config file"); } } else { throw new Exception("\"" + Constants.INPUT_TYPE + "\" is not specified in config file"); } // add config input to column definitions if (params.fileType.equals(Constants.CSV_FILE)) { List<String> binName = null; if (params.ignoreFirstLine) { String line; BufferedReader br = new BufferedReader( new InputStreamReader(new FileInputStream(allFileNames.get(0)), "UTF8")); if ((line = br.readLine()) != null) { binName = Parser.getCSVRawColumns(line, params.delimiter); br.close(); if (binName.size() != counters.write.colTotal) { throw new Exception("Number of column in config file and datafile are mismatch." + " Datafile: " + Utils.getFileName(allFileNames.get(0)) + " Configfile: " + Utils.getFileName(columnDefinitionFileName)); } } } //update columndefs for metadata for (int i = 0; i < metadataColumnDefs.size(); i++) { if (metadataColumnDefs.get(i).staticValue) { } else { if (metadataColumnDefs.get(i).binValuePos < 0) { if (metadataColumnDefs.get(i).columnName == null) { if (metadataColumnDefs.get(i).jsonPath == null) { log.error("dynamic metadata having improper info" + metadataColumnDefs.toString()); //TODO } else { //TODO check for json_path } } else { if (params.ignoreFirstLine) { if (binName.indexOf(metadataColumnDefs.get(i).binValueHeader) != -1) { metadataColumnDefs.get(i).binValuePos = binName .indexOf(metadataColumnDefs.get(i).binValueHeader); } else { throw new Exception("binName missing in data file:" + metadataColumnDefs.get(i).binValueHeader); } } } } else { if (params.ignoreFirstLine) metadataColumnDefs.get(i).binValueHeader = binName .get(metadataColumnDefs.get(i).binValuePos); } } if ((!metadataColumnDefs.get(i).staticValue) && (metadataColumnDefs.get(i).binValuePos < 0)) { throw new Exception("Information for bin mapping is missing in config file:" + metadataColumnDefs.get(i)); } if (metadataColumnDefs.get(i).srcType == null) { throw new Exception( "Source data type is not properly mentioned:" + metadataColumnDefs.get(i)); } if (metadataColumnDefs.get(i).binNameHeader == Constants.SET && !metadataColumnDefs.get(i).srcType.equals(SrcColumnType.STRING)) { throw new Exception("Set name should be string type:" + metadataColumnDefs.get(i)); } if (metadataColumnDefs.get(i).binNameHeader.equalsIgnoreCase(Constants.SET) && params.set != null) { throw new Exception( "Set name is given both in config file and commandline. Provide only once."); } } //update columndefs for bins for (int i = 0; i < binColumnDefs.size(); i++) { if (binColumnDefs.get(i).staticName) { } else { if (binColumnDefs.get(i).binNamePos < 0) { if (binColumnDefs.get(i).columnName == null) { if (binColumnDefs.get(i).jsonPath == null) { log.error("dynamic bin having improper info"); //TODO } else { //TODO check for json_path } } else { if (params.ignoreFirstLine) { if (binName.indexOf(binColumnDefs.get(i).binNameHeader) != -1) { binColumnDefs.get(i).binNamePos = binName .indexOf(binColumnDefs.get(i).binNameHeader); } else { throw new Exception("binName missing in data file:" + binColumnDefs.get(i).binNameHeader); } } } } else { if (params.ignoreFirstLine) binColumnDefs.get(i).binNameHeader = binName.get(binColumnDefs.get(i).binNamePos); } } if (binColumnDefs.get(i).staticValue) { } else { if (binColumnDefs.get(i).binValuePos < 0) { if (binColumnDefs.get(i).columnName == null) { if (binColumnDefs.get(i).jsonPath == null) { log.error("dynamic bin having improper info"); //TODO } else { //TODO check for json_path } } else { if (params.ignoreFirstLine) { if (binName.contains(binColumnDefs.get(i).binValueHeader)) { binColumnDefs.get(i).binValuePos = binName .indexOf(binColumnDefs.get(i).binValueHeader); } else if (!binColumnDefs.get(i).binValueHeader.toLowerCase() .equals(Constants.SYSTEM_TIME)) { throw new Exception("Wrong column name mentioned in config file:" + binColumnDefs.get(i).binValueHeader); } } } } else { if (params.ignoreFirstLine) binColumnDefs.get(i).binValueHeader = binName.get(binColumnDefs.get(i).binValuePos); } //check for missing entries in config file if (binColumnDefs.get(i).binValuePos < 0 && binColumnDefs.get(i).binValueHeader == null) { throw new Exception("Information missing(Value header or bin mapping) in config file:" + binColumnDefs.get(i)); } //check for proper data type in config file. if (binColumnDefs.get(i).srcType == null) { throw new Exception( "Source data type is not properly mentioned:" + binColumnDefs.get(i)); } //check for valid destination type if ((binColumnDefs.get(i).srcType.equals(SrcColumnType.TIMESTAMP) || binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB)) && binColumnDefs.get(i).dstType == null) { throw new Exception("Destination type is not mentioned: " + binColumnDefs.get(i)); } //check for encoding if (binColumnDefs.get(i).dstType != null && binColumnDefs.get(i).encoding == null) { throw new Exception( "Encoding is not given for src-dst type conversion:" + binColumnDefs.get(i)); } //check for valid encoding if (binColumnDefs.get(i).srcType.equals(SrcColumnType.BLOB) && !binColumnDefs.get(i).encoding.equals(Constants.HEX_ENCODING)) { throw new Exception("Wrong encoding for blob data:" + binColumnDefs.get(i)); } } //Check static bin name mapped to dynamic bin value if ((binColumnDefs.get(i).binNamePos == binColumnDefs.get(i).binValuePos) && (binColumnDefs.get(i).binNamePos != -1)) { throw new Exception("Static bin name mapped to dynamic bin value:" + binColumnDefs.get(i)); } //check for missing entries in config file if (binColumnDefs.get(i).binNameHeader == null && binColumnDefs.get(i).binNameHeader.length() > Constants.BIN_NAME_LENGTH) { throw new Exception("Information missing binName or large binName in config file:" + binColumnDefs.get(i)); } } } log.info(params.toString()); log.debug("MetadataConfig:" + metadataColumnDefs); log.debug("BinColumnDefs:" + binColumnDefs); // Start PrintStat thread statPrinter.start(); // Reader pool size ExecutorService readerPool = Executors.newFixedThreadPool( nReaderThreads > allFileNames.size() ? allFileNames.size() : nReaderThreads); log.info("Reader pool size : " + nReaderThreads); // Submit all tasks to writer threadpool. for (String aFile : allFileNames) { log.debug("Submitting task for: " + aFile); readerPool.submit(new AerospikeLoad(aFile, client, params)); } // Wait for reader pool to complete readerPool.shutdown(); log.info("Shutdown down reader thread pool"); while (!readerPool.isTerminated()) ; //readerPool.awaitTermination(20, TimeUnit.MINUTES); log.info("Reader thread pool terminated"); // Wait for writer pool to complete after getting all tasks from reader pool writerPool.shutdown(); log.info("Shutdown down writer thread pool"); while (!writerPool.isTerminated()) ; log.info("Writer thread pool terminated"); // Print final statistic of aerospike-loader. log.info("Final Statistics of importer: (Succesfull Writes = " + counters.write.writeCount.get() + ", " + "Errors=" + (counters.write.writeErrors.get() + counters.write.readErrors.get() + counters.write.processingErrors.get()) + "(" + (counters.write.writeErrors.get()) + "-Write," + counters.write.readErrors.get() + "-Read," + counters.write.processingErrors.get() + "-Processing)"); } catch (Exception e) { log.error(e); if (log.isDebugEnabled()) { e.printStackTrace(); } } finally { // Stop statistic printer thread. statPrinter.interrupt(); log.info("Aerospike loader completed"); } }
From source file:accumulo.AccumuloStuff.java
public static void main(String[] args) throws Exception { File tmp = new File(System.getProperty("user.dir") + "/target/mac-test"); if (tmp.exists()) { FileUtils.deleteDirectory(tmp);/*from w ww . ja v a 2 s. c om*/ } tmp.mkdirs(); String passwd = "password"; MiniAccumuloConfigImpl cfg = new MiniAccumuloConfigImpl(tmp, passwd); cfg.setNumTservers(1); // cfg.useMiniDFS(true); final MiniAccumuloClusterImpl cluster = cfg.build(); setCoreSite(cluster); cluster.start(); ExecutorService svc = Executors.newFixedThreadPool(2); try { Connector conn = cluster.getConnector("root", passwd); String table = "table"; conn.tableOperations().create(table); final BatchWriter bw = conn.createBatchWriter(table, new BatchWriterConfig()); final AtomicBoolean flushed = new AtomicBoolean(false); Runnable writer = new Runnable() { @Override public void run() { try { Mutation m = new Mutation("row"); m.put("colf", "colq", "value"); bw.addMutation(m); bw.flush(); flushed.set(true); } catch (Exception e) { log.error("Got exception trying to flush mutation", e); } log.info("Exiting batchwriter thread"); } }; Runnable restarter = new Runnable() { @Override public void run() { try { for (ProcessReference proc : cluster.getProcesses().get(ServerType.TABLET_SERVER)) { cluster.killProcess(ServerType.TABLET_SERVER, proc); } cluster.exec(TabletServer.class); } catch (Exception e) { log.error("Caught exception restarting tabletserver", e); } log.info("Exiting restart thread"); } }; svc.execute(writer); svc.execute(restarter); log.info("Waiting for shutdown"); svc.shutdown(); if (!svc.awaitTermination(120, TimeUnit.SECONDS)) { log.info("Timeout on shutdown exceeded"); svc.shutdownNow(); } else { log.info("Cleanly shutdown"); log.info("Threadpool is terminated? " + svc.isTerminated()); } if (flushed.get()) { log.info("****** BatchWriter was flushed *********"); } else { log.info("****** BatchWriter was NOT flushed *********"); } bw.close(); log.info("Got record {}", Iterables.getOnlyElement(conn.createScanner(table, Authorizations.EMPTY))); } finally { cluster.stop(); } }