List of usage examples for java.util HashSet add
public boolean add(E e)
From source file:com.yahoo.ycsb.db.AsyncHBaseClient.java
public static void main(String[] args) { if (args.length != 4) { System.out.println("usage: ahc zkquorum zkpath threadcount operation_count"); System.exit(0);//from w w w.j a v a2 s .c om } final int keyspace = 10000; //120000000; final String zkQuorum = args[0]; final String zkPath = args[1]; final int threadcount = Integer.parseInt(args[2]); final int opcount = Integer.parseInt(args[3]) / threadcount; Vector<Thread> allthreads = new Vector<Thread>(); for (int i = 0; i < threadcount; i++) { Thread t = new Thread() { public void run() { try { Random random = new Random(); AsyncHBaseClient cli = new AsyncHBaseClient(); Properties props = new Properties(); props.setProperty("columnfamily", "f1"); props.setProperty("zkquorum", zkQuorum); props.setProperty("zkpath", zkPath); props.setProperty("debug", "true"); cli.setProperties(props); cli.init(); HashMap<String, ByteIterator> result = Maps.newHashMap(); long accum = 0; for (int i = 0; i < opcount; i++) { int keynum = random.nextInt(keyspace); String key = "user" + keynum; long st = System.currentTimeMillis(); int rescode; HashMap<String, ByteIterator> hm = Maps.newHashMap(); hm.put("field1", new ByteArrayByteIterator("value1".getBytes("UTF-8"))); hm.put("field2", new ByteArrayByteIterator("value2".getBytes("UTF-8"))); hm.put("field3", new ByteArrayByteIterator("value3".getBytes("UTF-8"))); hm.put("efield", new ByteArrayByteIterator(HBaseClient.EMPTY_ARRAY)); rescode = cli.insert("bench", key, hm); HashSet<String> s = Sets.newHashSet(); s.add("field1"); s.add("field2"); rescode = cli.read("bench", key, s, result); rescode = cli.delete("bench", key); rescode = cli.read("bench", key, s, result); HashSet<String> scanFields = Sets.newHashSet(); scanFields.add("field1"); scanFields.add("field3"); Vector<HashMap<String, ByteIterator>> scanResults = new Vector<HashMap<String, ByteIterator>>(); rescode = cli.scan("bench", "user2", 20, null, scanResults); long en = System.currentTimeMillis(); accum += (en - st); if (rescode != Ok) { System.out.println("Error " + rescode + " for " + key); } if (i % 10 == 0) { System.out.println( i + " operations, average latency: " + (((double) accum) / ((double) i))); } } System.out.println(new ToStringBuilder(cli._client.stats(), ToStringStyle.MULTI_LINE_STYLE) .toString()); } catch (Exception e) { e.printStackTrace(); } } }; allthreads.add(t); } long st = System.currentTimeMillis(); for (Thread t : allthreads) { t.start(); } for (Thread t : allthreads) { try { t.join(); } catch (InterruptedException e) { } } long en = System.currentTimeMillis(); System.out.println("Throughput: " + ((1000.0) * (((double) (opcount * threadcount)) / ((double) (en - st)))) + " ops/sec"); }
From source file:edu.umass.cs.reconfiguration.deprecated.ReconfigurableClient.java
/** * Simple test client for the reconfiguration package. Clients only know the * set of all reconfigurators, not active replicas for any name. All * information about active replicas for a name is obtained from * reconfigurators. Any request can be sent to any reconfigurator and it * will forward to the appropriate reconfigurator if necessary and relay * back the response.// ww w. j a v a 2s . c o m * * @param args */ public static void main(String[] args) { ReconfigurableClient client = null; try { /* * Client can only send/receive clear text or do server-only * authentication */ JSONMessenger<?> messenger = new JSONMessenger<String>((new MessageNIOTransport<String, JSONObject>( null, null, new PacketDemultiplexerDefault(), true, ReconfigurationConfig.getClientSSLMode()))); client = new ReconfigurableClient(ReconfigurationConfig.getReconfiguratorAddresses(), messenger); int numRequests = 2; String requestValuePrefix = "request_value"; long nameReqInterArrivalTime = 200; long NCReqInterArrivalTime = 1000; String initValue = "initial_value"; int numIterations = 10000; boolean testReconfigureRC = true; for (int j = 0; j < numIterations; j++) { String namePrefix = "name" + (int) (Math.random() * Integer.MAX_VALUE); String reconfiguratorID = "RC" + (int) (Math.random() * 64000); long t0 = System.currentTimeMillis(); // /////////////request active replicas//////////////////// t0 = System.currentTimeMillis(); do client.sendRequest(client.makeRequestActiveReplicas(namePrefix)); while (!client.waitForFailure(namePrefix)); DelayProfiler.updateDelay("requestActives", t0); // active replicas for name initially don't exist assert (client.getActiveReplicas() == null || client.getActiveReplicas().isEmpty()); // //////////////////////////////////////////////////////// // ////////////////////create name///////////////////////// t0 = System.currentTimeMillis(); do client.sendRequest(client.makeCreateNameRequest(namePrefix, initValue)); while (!client.waitForSuccess(namePrefix)); DelayProfiler.updateDelay("createName", t0); // //////////////////////////////////////////////////////// /* * Verify that active replicas for name now exist. The only * reason the query is repeated is because it is possible to * find the name non-existent briefly if the query is sent to a * different reconfigurator that hasn't yet caught up with the * creation (but will eventually do so). */ // //////////////////////////////////////////////////////// t0 = System.currentTimeMillis(); do client.sendRequest(client.makeRequestActiveReplicas(namePrefix)); while (!client.waitForSuccess(namePrefix)); DelayProfiler.updateDelay("requestActives", t0); assert (client.getActiveReplicas() != null && !client.getActiveReplicas().isEmpty()); // //////////////////////////////////////////////////////// // ///////send a stream of app requests sequentially/////// for (int i = 0; i < numRequests; i++) { t0 = System.currentTimeMillis(); do client.sendRequest(client.makeRequest(namePrefix, requestValuePrefix + i)); while (!client.rcvdAppReply(namePrefix)); DelayProfiler.updateDelay("appPaxosRequest", t0); Thread.sleep(nameReqInterArrivalTime); } // //////////////////////////////////////////////////////// // //////////////////////////////////////////////////////// // request current active replicas (possibly reconfigured) t0 = System.currentTimeMillis(); do client.sendRequest(client.makeRequestActiveReplicas(namePrefix)); while (!client.waitForSuccess(namePrefix)); DelayProfiler.updateDelay("requestActives", t0); // //////////////////////////////////////////////////////// // ///////////////delete name, retransmit if error//////////// t0 = System.currentTimeMillis(); do client.sendRequest(client.makeDeleteNameRequest(namePrefix)); while (!client.waitForSuccess(namePrefix)); DelayProfiler.updateDelay("deleteName", t0); Thread.sleep(nameReqInterArrivalTime); // //////////////////////////////////////////////////////// // //////////////////////////////////////////////////////// // verify that active replicas for name now don't exist. The t0 = System.currentTimeMillis(); do client.sendRequest(client.makeRequestActiveReplicas(namePrefix)); while (!client.waitForFailure(namePrefix)); DelayProfiler.updateDelay("requestActives", t0); assert (client.getActiveReplicas() == null || client.getActiveReplicas().isEmpty()); // //////////////////////////////////////////////////////// if (!testReconfigureRC) continue; // //////////////////////////////////////////////////////// // add RC node; the port below does not matter in this test t0 = System.currentTimeMillis(); // do client.sendRequest(new ReconfigureRCNodeConfig<String>(null, reconfiguratorID, new InetSocketAddress(InetAddress.getByName("localhost"), TEST_PORT))); while (!client .waitForReconfigureRCSuccess(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString())) ; DelayProfiler.updateDelay("addReconfigurator", t0); // //////////////////////////////////////////////////////// Thread.sleep(NCReqInterArrivalTime); // //////////////// delete just added RC node////////////////// HashSet<String> deleted = new HashSet<String>(); deleted.add(reconfiguratorID); t0 = System.currentTimeMillis(); // do client.sendRequest(new ReconfigureRCNodeConfig<String>(null, null, deleted)); while (!client .waitForReconfigureRCSuccess(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString())) { } DelayProfiler.updateDelay("removeReconfigurator", t0); // //////////////////////////////////////////////////////// Thread.sleep(NCReqInterArrivalTime); client.log.info("\n\n\n\n==================Successfully completed iteration " + j + ":\n" + DelayProfiler.getStats() + "\n\n\n\n"); } // client.messenger.stop(); } catch (IOException ioe) { ioe.printStackTrace(); } catch (JSONException je) { je.printStackTrace(); } catch (InterruptedException ie) { ie.printStackTrace(); } catch (RequestParseException e) { e.printStackTrace(); } }
From source file:eu.fbk.dkm.sectionextractor.WikipediaSectionTitlesExtractor.java
public static void main(String args[]) throws IOException { CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger(); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("wikipedia xml dump file").isRequired().withLongOpt("wikipedia-dump").create("d")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Filter file") .withLongOpt("filter").create("f")); commandLineWithLogger.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("output file") .isRequired().withLongOpt("output-file").create("o")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription("max depth (default " + MAX_DEPTH + ")").withLongOpt("max-depth").create("m")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription("max num of sections").withLongOpt("max-num").create("n")); commandLineWithLogger.addOption(new Option("l", "print titles")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription(// ww w . ja v a2 s. co m "number of threads (default " + AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER + ")") .withLongOpt("num-threads").create("t")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription("number of pages to process (default all)").withLongOpt("num-pages").create("p")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription("receive notification every n pages (default " + AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT + ")") .withLongOpt("notification-point").create("b")); CommandLine commandLine = null; try { commandLine = commandLineWithLogger.getCommandLine(args); PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps()); } catch (Exception e) { System.exit(1); } int numThreads = Integer.parseInt(commandLine.getOptionValue("num-threads", Integer.toString(AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER))); int numPages = Integer.parseInt(commandLine.getOptionValue("num-pages", Integer.toString(AbstractWikipediaExtractor.DEFAULT_NUM_PAGES))); int notificationPoint = Integer.parseInt(commandLine.getOptionValue("notification-point", Integer.toString(AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT))); int configuredDepth = Integer .parseInt(commandLine.getOptionValue("max-depth", Integer.toString(MAX_DEPTH))); int maxNum = Integer.parseInt(commandLine.getOptionValue("max-num", "0")); boolean printTitles = commandLine.hasOption("l"); HashSet<String> pagesToConsider = null; String filterFileName = commandLine.getOptionValue("filter"); if (filterFileName != null) { File filterFile = new File(filterFileName); if (filterFile.exists()) { pagesToConsider = new HashSet<>(); List<String> lines = Files.readLines(filterFile, Charsets.UTF_8); for (String line : lines) { line = line.trim(); if (line.length() == 0) { continue; } line = line.replaceAll("\\s+", "_"); pagesToConsider.add(line); } } } File outputFile = new File(commandLine.getOptionValue("output-file")); ExtractorParameters extractorParameters = new ExtractorParameters( commandLine.getOptionValue("wikipedia-dump"), outputFile.getAbsolutePath()); WikipediaExtractor wikipediaPageParser = new WikipediaSectionTitlesExtractor(numThreads, numPages, extractorParameters.getLocale(), outputFile, configuredDepth, maxNum, printTitles, pagesToConsider); wikipediaPageParser.setNotificationPoint(notificationPoint); wikipediaPageParser.start(extractorParameters); logger.info("extraction ended " + new Date()); }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.CollectionDiffer.java
public static void main(String[] args) { Options options = new Options(); options.addOption("i1", null, true, "Input file 1"); options.addOption("i2", null, true, "Input file 2"); options.addOption("o", null, true, "Output file"); CommandLineParser parser = new org.apache.commons.cli.GnuParser(); try {/* w w w . j a va 2s .c om*/ CommandLine cmd = parser.parse(options, args); InputStream input1 = null, input2 = null; if (cmd.hasOption("i1")) { input1 = CompressUtils.createInputStream(cmd.getOptionValue("i1")); } else { Usage("Specify 'Input file 1'"); } if (cmd.hasOption("i2")) { input2 = CompressUtils.createInputStream(cmd.getOptionValue("i2")); } else { Usage("Specify 'Input file 2'"); } HashSet<String> hSubj = new HashSet<String>(); BufferedWriter out = null; if (cmd.hasOption("o")) { String outFile = cmd.getOptionValue("o"); out = new BufferedWriter(new OutputStreamWriter(CompressUtils.createOutputStream(outFile))); } else { Usage("Specify 'Output file'"); } XmlIterator inpIter2 = new XmlIterator(input2, YahooAnswersReader.DOCUMENT_TAG); int docNum = 1; for (String oneRec = inpIter2.readNext(); !oneRec.isEmpty(); oneRec = inpIter2.readNext(), ++docNum) { if (docNum % 10000 == 0) { System.out.println(String.format( "Loaded and memorized questions for %d documents from the second input file", docNum)); } ParsedQuestion q = YahooAnswersParser.parse(oneRec, false); hSubj.add(q.mQuestion); } XmlIterator inpIter1 = new XmlIterator(input1, YahooAnswersReader.DOCUMENT_TAG); System.out.println("============================================="); System.out.println("Memoization is done... now let's diff!!!"); System.out.println("============================================="); docNum = 1; int skipOverlapQty = 0, skipErrorQty = 0; for (String oneRec = inpIter1.readNext(); !oneRec.isEmpty(); ++docNum, oneRec = inpIter1.readNext()) { if (docNum % 10000 == 0) { System.out.println(String.format("Processed %d documents from the first input file", docNum)); } oneRec = oneRec.trim() + System.getProperty("line.separator"); ParsedQuestion q = null; try { q = YahooAnswersParser.parse(oneRec, false); } catch (Exception e) { // If <bestanswer>...</bestanswer> is missing we may end up here... // This is a bit funny, because this element is supposed to be mandatory, // but it's not. System.err.println("Skipping due to parsing error, exception: " + e); skipErrorQty++; continue; } if (hSubj.contains(q.mQuestion.trim())) { //System.out.println(String.format("Skipping uri='%s', question='%s'", q.mQuestUri, q.mQuestion)); skipOverlapQty++; continue; } out.write(oneRec); } System.out.println( String.format("Processed %d documents, skipped because of overlap/errors %d/%d documents", docNum - 1, skipOverlapQty, skipErrorQty)); out.close(); } catch (ParseException e) { Usage("Cannot parse arguments"); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:InlineSchemaValidator.java
/** Main program entry point. */ public static void main(String[] argv) { // is there anything to do? if (argv.length == 0) { printUsage();/*from w w w . j av a 2 s.co m*/ System.exit(1); } // variables Vector schemas = null; Vector instances = null; HashMap prefixMappings = null; HashMap uriMappings = null; String docURI = argv[argv.length - 1]; String schemaLanguage = DEFAULT_SCHEMA_LANGUAGE; int repetition = DEFAULT_REPETITION; boolean schemaFullChecking = DEFAULT_SCHEMA_FULL_CHECKING; boolean honourAllSchemaLocations = DEFAULT_HONOUR_ALL_SCHEMA_LOCATIONS; boolean validateAnnotations = DEFAULT_VALIDATE_ANNOTATIONS; boolean generateSyntheticAnnotations = DEFAULT_GENERATE_SYNTHETIC_ANNOTATIONS; boolean memoryUsage = DEFAULT_MEMORY_USAGE; // process arguments for (int i = 0; i < argv.length - 1; ++i) { String arg = argv[i]; if (arg.startsWith("-")) { String option = arg.substring(1); if (option.equals("l")) { // get schema language name if (++i == argv.length) { System.err.println("error: Missing argument to -l option."); } else { schemaLanguage = argv[i]; } continue; } if (option.equals("x")) { if (++i == argv.length) { System.err.println("error: Missing argument to -x option."); continue; } String number = argv[i]; try { int value = Integer.parseInt(number); if (value < 1) { System.err.println("error: Repetition must be at least 1."); continue; } repetition = value; } catch (NumberFormatException e) { System.err.println("error: invalid number (" + number + ")."); } continue; } if (arg.equals("-a")) { // process -a: xpath expressions for schemas if (schemas == null) { schemas = new Vector(); } while (i + 1 < argv.length - 1 && !(arg = argv[i + 1]).startsWith("-")) { schemas.add(arg); ++i; } continue; } if (arg.equals("-i")) { // process -i: xpath expressions for instance documents if (instances == null) { instances = new Vector(); } while (i + 1 < argv.length - 1 && !(arg = argv[i + 1]).startsWith("-")) { instances.add(arg); ++i; } continue; } if (arg.equals("-nm")) { String prefix; String uri; while (i + 2 < argv.length - 1 && !(prefix = argv[i + 1]).startsWith("-") && !(uri = argv[i + 2]).startsWith("-")) { if (prefixMappings == null) { prefixMappings = new HashMap(); uriMappings = new HashMap(); } prefixMappings.put(prefix, uri); HashSet prefixes = (HashSet) uriMappings.get(uri); if (prefixes == null) { prefixes = new HashSet(); uriMappings.put(uri, prefixes); } prefixes.add(prefix); i += 2; } continue; } if (option.equalsIgnoreCase("f")) { schemaFullChecking = option.equals("f"); continue; } if (option.equalsIgnoreCase("hs")) { honourAllSchemaLocations = option.equals("hs"); continue; } if (option.equalsIgnoreCase("va")) { validateAnnotations = option.equals("va"); continue; } if (option.equalsIgnoreCase("ga")) { generateSyntheticAnnotations = option.equals("ga"); continue; } if (option.equalsIgnoreCase("m")) { memoryUsage = option.equals("m"); continue; } if (option.equals("h")) { printUsage(); continue; } System.err.println("error: unknown option (" + option + ")."); continue; } } try { // Create new instance of inline schema validator. InlineSchemaValidator inlineSchemaValidator = new InlineSchemaValidator(prefixMappings, uriMappings); // Parse document containing schemas and validation roots DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(inlineSchemaValidator); Document doc = db.parse(docURI); // Create XPath factory for selecting schema and validation roots XPathFactory xpf = XPathFactory.newInstance(); XPath xpath = xpf.newXPath(); xpath.setNamespaceContext(inlineSchemaValidator); // Select schema roots from the DOM NodeList[] schemaNodes = new NodeList[schemas != null ? schemas.size() : 0]; for (int i = 0; i < schemaNodes.length; ++i) { XPathExpression xpathSchema = xpath.compile((String) schemas.elementAt(i)); schemaNodes[i] = (NodeList) xpathSchema.evaluate(doc, XPathConstants.NODESET); } // Select validation roots from the DOM NodeList[] instanceNodes = new NodeList[instances != null ? instances.size() : 0]; for (int i = 0; i < instanceNodes.length; ++i) { XPathExpression xpathInstance = xpath.compile((String) instances.elementAt(i)); instanceNodes[i] = (NodeList) xpathInstance.evaluate(doc, XPathConstants.NODESET); } // Create SchemaFactory and configure SchemaFactory factory = SchemaFactory.newInstance(schemaLanguage); factory.setErrorHandler(inlineSchemaValidator); try { factory.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking); } catch (SAXNotRecognizedException e) { System.err.println("warning: SchemaFactory does not recognize feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println("warning: SchemaFactory does not support feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")"); } try { factory.setFeature(HONOUR_ALL_SCHEMA_LOCATIONS_ID, honourAllSchemaLocations); } catch (SAXNotRecognizedException e) { System.err.println("warning: SchemaFactory does not recognize feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: SchemaFactory does not support feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")"); } try { factory.setFeature(VALIDATE_ANNOTATIONS_ID, validateAnnotations); } catch (SAXNotRecognizedException e) { System.err.println( "warning: SchemaFactory does not recognize feature (" + VALIDATE_ANNOTATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: SchemaFactory does not support feature (" + VALIDATE_ANNOTATIONS_ID + ")"); } try { factory.setFeature(GENERATE_SYNTHETIC_ANNOTATIONS_ID, generateSyntheticAnnotations); } catch (SAXNotRecognizedException e) { System.err.println("warning: SchemaFactory does not recognize feature (" + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println("warning: SchemaFactory does not support feature (" + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")"); } // Build Schema from sources Schema schema; { DOMSource[] sources; int size = 0; for (int i = 0; i < schemaNodes.length; ++i) { size += schemaNodes[i].getLength(); } sources = new DOMSource[size]; if (size == 0) { schema = factory.newSchema(); } else { int count = 0; for (int i = 0; i < schemaNodes.length; ++i) { NodeList nodeList = schemaNodes[i]; int nodeListLength = nodeList.getLength(); for (int j = 0; j < nodeListLength; ++j) { sources[count++] = new DOMSource(nodeList.item(j)); } } schema = factory.newSchema(sources); } } // Setup validator and input source. Validator validator = schema.newValidator(); validator.setErrorHandler(inlineSchemaValidator); try { validator.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking); } catch (SAXNotRecognizedException e) { System.err.println( "warning: Validator does not recognize feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: Validator does not support feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")"); } try { validator.setFeature(HONOUR_ALL_SCHEMA_LOCATIONS_ID, honourAllSchemaLocations); } catch (SAXNotRecognizedException e) { System.err.println( "warning: Validator does not recognize feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: Validator does not support feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")"); } try { validator.setFeature(VALIDATE_ANNOTATIONS_ID, validateAnnotations); } catch (SAXNotRecognizedException e) { System.err .println("warning: Validator does not recognize feature (" + VALIDATE_ANNOTATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println("warning: Validator does not support feature (" + VALIDATE_ANNOTATIONS_ID + ")"); } try { validator.setFeature(GENERATE_SYNTHETIC_ANNOTATIONS_ID, generateSyntheticAnnotations); } catch (SAXNotRecognizedException e) { System.err.println("warning: Validator does not recognize feature (" + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: Validator does not support feature (" + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")"); } // Validate instance documents for (int i = 0; i < instanceNodes.length; ++i) { NodeList nodeList = instanceNodes[i]; int nodeListLength = nodeList.getLength(); for (int j = 0; j < nodeListLength; ++j) { DOMSource source = new DOMSource(nodeList.item(j)); source.setSystemId(docURI); inlineSchemaValidator.validate(validator, source, docURI, repetition, memoryUsage); } } } catch (SAXParseException e) { // ignore } catch (Exception e) { System.err.println("error: Parse error occurred - " + e.getMessage()); if (e instanceof SAXException) { Exception nested = ((SAXException) e).getException(); if (nested != null) { e = nested; } } e.printStackTrace(System.err); } }
From source file:eu.fbk.dkm.sectionextractor.pantheon.WikipediaGoodTextExtractor.java
public static void main(String args[]) throws IOException { CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger(); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("wikipedia xml dump file").isRequired().withLongOpt("wikipedia-dump").create("d")); commandLineWithLogger.addOption(OptionBuilder.withArgName("dir").hasArg() .withDescription("output directory in which to store output files").isRequired() .withLongOpt("output-dir").create("o")); commandLineWithLogger//from ww w. j av a 2s.c om .addOption(OptionBuilder.withDescription("use NAF format").withLongOpt("naf").create("n")); commandLineWithLogger.addOption(OptionBuilder.withDescription("tokenize and ssplit with Stanford") .withLongOpt("stanford").create("s")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Filter file") .withLongOpt("filter").create("f")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("ID and category file").withLongOpt("idcat").create("i")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Redirect file") .withLongOpt("redirect").create("r")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription( "number of threads (default " + AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER + ")") .withLongOpt("num-threads").create("t")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription("number of pages to process (default all)").withLongOpt("num-pages").create("p")); commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg() .withDescription("receive notification every n pages (default " + AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT + ")") .withLongOpt("notification-point").create("b")); commandLineWithLogger.addOption(new Option("n", "NAF format")); CommandLine commandLine = null; try { commandLine = commandLineWithLogger.getCommandLine(args); PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps()); } catch (Exception e) { System.exit(1); } int numThreads = Integer.parseInt(commandLine.getOptionValue("num-threads", Integer.toString(AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER))); int numPages = Integer.parseInt(commandLine.getOptionValue("num-pages", Integer.toString(AbstractWikipediaExtractor.DEFAULT_NUM_PAGES))); int notificationPoint = Integer.parseInt(commandLine.getOptionValue("notification-point", Integer.toString(AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT))); boolean nafFormat = commandLine.hasOption("n"); boolean useStanford = commandLine.hasOption("s"); HashMap<Integer, String> idCategory = new HashMap<>(); String idcatFileName = commandLine.getOptionValue("idcat"); if (idcatFileName != null) { logger.info("Loading categories"); File idcatFile = new File(idcatFileName); if (idcatFile.exists()) { List<String> lines = Files.readLines(idcatFile, Charsets.UTF_8); for (String line : lines) { line = line.trim(); if (line.length() == 0) { continue; } String[] parts = line.split("\\s+"); if (parts.length < 3) { continue; } idCategory.put(Integer.parseInt(parts[1]), parts[2]); } } } HashMap<String, String> redirects = new HashMap<>(); String redirectFileName = commandLine.getOptionValue("redirect"); if (redirectFileName != null) { logger.info("Loading redirects"); File redirectFile = new File(redirectFileName); if (redirectFile.exists()) { List<String> lines = Files.readLines(redirectFile, Charsets.UTF_8); for (String line : lines) { line = line.trim(); if (line.length() == 0) { continue; } String[] parts = line.split("\\t+"); if (parts.length < 2) { continue; } redirects.put(parts[0], parts[1]); } } } HashSet<String> pagesToConsider = null; String filterFileName = commandLine.getOptionValue("filter"); if (filterFileName != null) { logger.info("Loading file list"); File filterFile = new File(filterFileName); if (filterFile.exists()) { pagesToConsider = new HashSet<>(); List<String> lines = Files.readLines(filterFile, Charsets.UTF_8); for (String line : lines) { line = line.trim(); if (line.length() == 0) { continue; } line = line.replaceAll("\\s+", "_"); pagesToConsider.add(line); addRedirects(pagesToConsider, redirects, line, 0); } } } ExtractorParameters extractorParameters = new ExtractorParameters( commandLine.getOptionValue("wikipedia-dump"), commandLine.getOptionValue("output-dir")); File outputFolder = new File(commandLine.getOptionValue("output-dir")); if (!outputFolder.exists()) { boolean mkdirs = outputFolder.mkdirs(); if (!mkdirs) { throw new IOException("Unable to create folder " + outputFolder.getAbsolutePath()); } } WikipediaExtractor wikipediaPageParser = new WikipediaGoodTextExtractor(numThreads, numPages, extractorParameters.getLocale(), outputFolder, nafFormat, pagesToConsider, useStanford, idCategory); wikipediaPageParser.setNotificationPoint(notificationPoint); wikipediaPageParser.start(extractorParameters); logger.info("extraction ended " + new Date()); }
From source file:edu.ku.brc.web.ParsePaleo.java
/** * @param args/*from w ww.ja v a 2s . c o m*/ */ public static void main(String[] args) { if (true) { ParsePaleo pp = new ParsePaleo(); pp.processAll(); return; } try { HashSet<String> set = new HashSet<String>(); for (String line : FileUtils.readLines(new File("/Users/rods/Downloads/ages.txt"))) { //Pattern p = Pattern.compile("\"([^\"\\]*(\\.[^\"\\]*)*)\"|\'([^\'\\]*(\\.[^\'\\]*)*)\'"); //Pattern p = Pattern.compile("\"([^\"]*)\"|(\\S+)"); Pattern p = Pattern.compile("\"([^\"]*)\""); Matcher m = p.matcher(line); //List<String> animals = new ArrayList()<String>(); while (m.find()) { //System.out.println(m.group()); set.add(replace(m.group(), "\"", "")); //animals.add(m.group()); } } for (String str : set) { System.out.println(str); } } catch (Exception ex) { ex.printStackTrace(); } }
From source file:metaTile.Main.java
/** * @param args/*from w w w. j a va2s . com*/ * @throws IOException */ public static void main(String[] args) throws IOException { try { /* parse the command line arguments */ // create the command line parser CommandLineParser parser = new PosixParser(); // create the Options Options options = new Options(); options.addOption("i", "input", true, "File to read original tile list from."); options.addOption("o", "output", true, "File to write shorter meta-tile list to."); options.addOption("m", "metatiles", true, "Number of tiles in x and y direction to group into one meta-tile."); // parse the command line arguments CommandLine commandLine = parser.parse(options, args); if (!commandLine.hasOption("input") || !commandLine.hasOption("output") || !commandLine.hasOption("metatiles")) printUsage(options); String inputFileName = commandLine.getOptionValue("input"); String outputFileName = commandLine.getOptionValue("output"); int metaTileSize = Integer.parseInt(commandLine.getOptionValue("metatiles")); ArrayList<RenderingTile> tiles = new ArrayList<RenderingTile>(); BufferedReader tileListReader = new BufferedReader(new FileReader(new File(inputFileName))); BufferedWriter renderMetatileListWriter = new BufferedWriter(new FileWriter(new File(outputFileName))); String line = tileListReader.readLine(); while (line != null) { String[] columns = line.split("/"); if (columns.length == 3) tiles.add(new RenderingTile(Integer.parseInt(columns[0]), Integer.parseInt(columns[1]), Integer.parseInt(columns[2]))); line = tileListReader.readLine(); } tileListReader.close(); int hits = 0; // tiles which we are already rendering as the top left corner of 4x4 metatiles HashSet<RenderingTile> whitelist = new HashSet<RenderingTile>(); // for each tile in the list see if it has a meta-tile in the whitelist already for (int i = 0; i < tiles.size(); i++) { boolean hit = false; // by default we aren't already rendering this tile as part of another metatile for (int dx = 0; dx < metaTileSize; dx++) { for (int dy = 0; dy < metaTileSize; dy++) { RenderingTile candidate = new RenderingTile(tiles.get(i).z, tiles.get(i).x - dx, tiles.get(i).y - dy); if (whitelist.contains(candidate)) { hit = true; // now exit the two for loops iterating over tiles inside a meta-tile dx = metaTileSize; dy = metaTileSize; } } } // if this tile doesn't already have a meta-tile in the whitelist, add it if (hit == false) { hits++; renderMetatileListWriter.write(tiles.get(i).toString() + "/" + metaTileSize + "\n"); whitelist.add(tiles.get(i)); } } renderMetatileListWriter.close(); System.out.println( "Reduced " + tiles.size() + " tiles into " + hits + " metatiles of size " + metaTileSize); } catch (Exception e) { e.printStackTrace(); } }
From source file:module.entities.UsernameChecker.CheckOpengovUsernames.java
/** * @param args the command line arguments *//*from w w w . j a v a 2 s . c om*/ public static void main(String[] args) throws SQLException, IOException { // args = new String[1]; // args[0] = "searchConf.txt"; Date d = new Date(); long milTime = d.getTime(); long execStart = System.nanoTime(); Timestamp startTime = new Timestamp(milTime); long lStartTime; long lEndTime = 0; int status_id = 1; JSONObject obj = new JSONObject(); if (args.length != 1) { System.out.println("None or too many argument parameters where defined! " + "\nPlease provide ONLY the configuration file name as the only argument."); } else { try { configFile = args[0]; initLexicons(); Database.init(); lStartTime = System.currentTimeMillis(); System.out.println("Opengov username identification process started at: " + startTime); usernameCheckerId = Database.LogUsernameChecker(lStartTime); TreeMap<Integer, String> OpenGovUsernames = Database.GetOpenGovUsers(); HashSet<ReportEntry> report_names = new HashSet<>(); if (OpenGovUsernames.size() > 0) { for (int userID : OpenGovUsernames.keySet()) { String DBusername = Normalizer .normalize(OpenGovUsernames.get(userID).toUpperCase(locale), Normalizer.Form.NFD) .replaceAll("\\p{M}", ""); String username = ""; int type; String[] splitUsername = DBusername.split(" "); if (checkNameInLexicons(splitUsername)) { for (String splText : splitUsername) { username += splText + " "; } type = 1; } else if (checkOrgInLexicons(splitUsername)) { for (String splText : splitUsername) { username += splText + " "; } type = 2; } else { username = DBusername; type = -1; } ReportEntry cerEntry = new ReportEntry(userID, username.trim(), type); report_names.add(cerEntry); } status_id = 2; obj.put("message", "Opengov username checker finished with no errors"); obj.put("details", ""); Database.UpdateOpengovUsersReportName(report_names); lEndTime = System.currentTimeMillis(); } else { status_id = 2; obj.put("message", "Opengov username checker finished with no errors"); obj.put("details", "No usernames needed to be checked"); lEndTime = System.currentTimeMillis(); } } catch (Exception ex) { System.err.println(ex.getMessage()); status_id = 3; obj.put("message", "Opengov username checker encountered an error"); obj.put("details", ex.getMessage().toString()); lEndTime = System.currentTimeMillis(); } } long execEnd = System.nanoTime(); long executionTime = (execEnd - execStart); System.out.println("Total process time: " + (((executionTime / 1000000) / 1000) / 60) + " minutes."); Database.UpdateLogUsernameChecker(lEndTime, status_id, usernameCheckerId, obj); Database.closeConnection(); }
From source file:eu.fbk.dkm.sectionextractor.PageClassMerger.java
public static void main(String args[]) throws IOException { CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger(); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("WikiData ID file").isRequired().withLongOpt("wikidata-id").create("i")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("Airpedia Person file").isRequired().withLongOpt("airpedia").create("a")); commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Output file") .isRequired().withLongOpt("output").create("o")); CommandLine commandLine = null;/* www. ja va2 s. c o m*/ try { commandLine = commandLineWithLogger.getCommandLine(args); PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps()); } catch (Exception e) { System.exit(1); } String wikiIDFileName = commandLine.getOptionValue("wikidata-id"); String airpediaFileName = commandLine.getOptionValue("airpedia"); String outputFileName = commandLine.getOptionValue("output"); HashMap<Integer, String> wikiIDs = new HashMap<>(); HashSet<Integer> airpediaClasses = new HashSet<>(); List<String> strings; logger.info("Loading file " + wikiIDFileName); strings = Files.readLines(new File(wikiIDFileName), Charsets.UTF_8); for (String line : strings) { line = line.trim(); if (line.length() == 0) { continue; } if (line.startsWith("#")) { continue; } String[] parts = line.split("\t"); if (parts.length < 2) { continue; } int id; try { id = Integer.parseInt(parts[0]); } catch (Exception e) { continue; } wikiIDs.put(id, parts[1]); } logger.info("Loading file " + airpediaFileName); strings = Files.readLines(new File(airpediaFileName), Charsets.UTF_8); for (String line : strings) { line = line.trim(); if (line.length() == 0) { continue; } if (line.startsWith("#")) { continue; } String[] parts = line.split("\t"); if (parts.length < 2) { continue; } int id; try { id = Integer.parseInt(parts[0]); } catch (Exception e) { continue; } airpediaClasses.add(id); } logger.info("Saving information"); BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName)); for (int i : wikiIDs.keySet()) { if (!airpediaClasses.contains(i)) { continue; } writer.append(wikiIDs.get(i)).append("\n"); } writer.close(); }