List of usage examples for javax.xml.parsers SAXParserFactory setNamespaceAware
public void setNamespaceAware(boolean awareness)
From source file:MainClass.java
public static void main(String args[]) throws Exception { SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); spf.setValidating(true);/*from w w w .ja v a 2 s .c o m*/ System.out.println("Parser will " + (spf.isNamespaceAware() ? "" : "not ") + "be namespace aware"); System.out.println("Parser will " + (spf.isValidating() ? "" : "not ") + "validate XML"); SAXParser parser = spf.newSAXParser(); System.out.println("Parser object is: " + parser); }
From source file:MainClass.java
public static void main(String args[]) { SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); spf.setValidating(true);//w w w. j a va2s .com System.out.println("Parser will " + (spf.isNamespaceAware() ? "" : "not ") + "be namespace aware"); System.out.println("Parser will " + (spf.isValidating() ? "" : "not ") + "validate XML"); SAXParser parser = null; try { parser = spf.newSAXParser(); } catch (ParserConfigurationException e) { e.printStackTrace(System.err); } catch (SAXException e) { e.printStackTrace(System.err); } System.out.println("Parser object is: " + parser); }
From source file:SAXTest.java
public static void main(String[] args) throws Exception { String url;/* w ww . j av a2 s .com*/ if (args.length == 0) { url = "http://www.w3c.org"; System.out.println("Using " + url); } else url = args[0]; DefaultHandler handler = new DefaultHandler() { public void startElement(String namespaceURI, String lname, String qname, Attributes attrs) { if (lname.equals("a") && attrs != null) { for (int i = 0; i < attrs.getLength(); i++) { String aname = attrs.getLocalName(i); if (aname.equals("href")) System.out.println(attrs.getValue(i)); } } } }; SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); SAXParser saxParser = factory.newSAXParser(); InputStream in = new URL(url).openStream(); saxParser.parse(in, handler); }
From source file:Main.java
public static void main(String[] args) throws Exception { String xml = "<?xml version='1.0'?><test><test2></test2></test>"; String schemaString = // "<?xml version='1.0'?>"// + "<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema' elementFormDefault='unqualified' attributeFormDefault='unqualified'>"// + "<xsd:element name='test' type='Test'/>"// + "<xsd:element name='test2' type='Test2'/>"// + "<xsd:complexType name='Test'>"// + "<xsd:sequence>"// + "<xsd:element ref='test2' minOccurs='1' maxOccurs='unbounded'/>"// + "</xsd:sequence>"// + "</xsd:complexType>"// + "<xsd:simpleType name='Test2'>"// + "<xsd:restriction base='xsd:string'><xsd:minLength value='1'/></xsd:restriction>"// + "</xsd:simpleType>"// + "</xsd:schema>"; Source schemaSource = new StreamSource(new StringReader(schemaString)); Schema schema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(schemaSource); SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); factory.setSchema(schema);/*from ww w. j ava2 s . c om*/ SAXParser parser = factory.newSAXParser(); MyContentHandler handler = new MyContentHandler(); parser.parse(new InputSource(new StringReader(xml)), handler); }
From source file:TrySAXHandler.java
public static void main(String args[]) throws Exception { File file = new File("y.xml"); SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser parser = null;/*from w w w. j a v a2s. co m*/ spf.setNamespaceAware(true); spf.setValidating(true); System.out.println("Parser will " + (spf.isNamespaceAware() ? "" : "not ") + "be namespace aware"); System.out.println("Parser will " + (spf.isValidating() ? "" : "not ") + "validate XML"); parser = spf.newSAXParser(); System.out.println("Parser object is: " + parser); MySAXHandler handler = new MySAXHandler(); parser.parse(file, handler); }
From source file:MainClass.java
public static void main(String args[]) throws Exception { SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser parser = null;/* w ww . j av a 2 s. c o m*/ spf.setNamespaceAware(true); spf.setValidating(true); try { spf.setFeature("http://xml.org/sax/features/namespace-prefixes", true); parser = spf.newSAXParser(); System.out.println("Parser object is: " + parser); } catch (Exception e) { e.printStackTrace(System.err); } MySAXHandler handler = new MySAXHandler(); parser.parse(new InputSource(new StringReader(xmlString)), handler); }
From source file:MainClass.java
public static void main(String args[]) throws Exception { SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser parser = null;/*from w ww . jav a2s .com*/ spf.setNamespaceAware(true); spf.setValidating(true); try { spf.setFeature("http://xml.org/sax/features/namespace-prefixes", true); parser = spf.newSAXParser(); System.out.println("Parser object is: " + parser); } catch (SAXException e) { e.printStackTrace(System.err); System.exit(1); } catch (ParserConfigurationException e) { e.printStackTrace(System.err); System.exit(1); } MySAXHandler handler = new MySAXHandler(); parser.parse(new InputSource(new StringReader(xmlString)), handler); }
From source file:MainClass.java
public static void main(String args[]) throws Exception { SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser parser = null;// w w w .j a v a 2 s . c o m spf.setNamespaceAware(true); try { SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); spf.setSchema(sf.newSchema(new SAXSource(new InputSource(new StringReader(schemaString))))); parser = spf.newSAXParser(); } catch (SAXException e) { e.printStackTrace(System.err); System.exit(1); } catch (ParserConfigurationException e) { e.printStackTrace(System.err); System.exit(1); } MySAXHandler handler = new MySAXHandler(); System.out.println(schemaString); parser.parse(new InputSource(new StringReader(xmlString)), handler); }
From source file:efen.parsewiki.WikipediaDocumentSequence.java
public static void main(final String arg[]) throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException { SimpleJSAP jsap = new SimpleJSAP(WikipediaDocumentSequence.class.getName(), "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.", new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE), JSAP.NOT_REQUIRED, 'w', "width", "The width, in bits, of the signatures used to sign the function from URIs to their rank."), new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED, "The file containing the Wikipedia dump."), new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED, "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."), new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED, "The URIs of the documents in the collection (generated by ScanMetadata)."), new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of a precomputed virtual document resolver for the collection."), new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of the resulting virtual document resolver.") }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;/*from www . ja v a 2 s . co m*/ final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setNamespaceAware(true); final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>(); final String baseURL = jsapResult.getString("baseURL"); final ProgressLogger progressLogger = new ProgressLogger(LOGGER); progressLogger.itemsName = "redirects"; progressLogger.start("Extracting redirects..."); final SAXParser parser = saxParserFactory.newSAXParser(); final DefaultHandler handler = new DefaultHandler() { private boolean inTitle; private MutableString title = new MutableString(); @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("page".equals(localName)) { inTitle = false; title.length(0); } else if ("title".equals(localName) && title.length() == 0) inTitle = true; // We catch only the first title element. else if ("redirect".equals(localName) && attributes.getValue("title") != null) { progressLogger.update(); redirects.put(title.copy(), attributes.getValue("title")); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("title".equals(localName)) inTitle = false; } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } }; InputStream in = new FileInputStream(jsapResult.getString("file")); if (jsapResult.userSpecified("bzip2")) in = new BZip2CompressorInputStream(in); parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)), handler); progressLogger.done(); final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>(); final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr")); progressLogger.expectedUpdates = redirects.size(); progressLogger.start("Examining redirects..."); for (Map.Entry<MutableString, String> e : redirects.entrySet()) { final MutableString start = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true)); final MutableString end = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getValue(), true)); final long s = vdr.resolve(start); if (s == -1) { final long t = vdr.resolve(end); if (t != -1) resolved.put(start.copy(), t); else LOGGER.warn("Failed redirect: " + start + " -> " + end); } else LOGGER.warn("URL " + start + " is already known to the virtual document resolver"); progressLogger.lightUpdate(); } progressLogger.done(); //System.err.println(resolved); final Iterable<MutableString> allURIs = Iterables .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet()); final long numberOfDocuments = vdr.numberOfDocuments(); final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso") ? TransformationStrategies.iso() : TransformationStrategies.utf16(); BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments, new ShiftAddXorSignedStringMap(allURIs.iterator(), new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy) .build(), jsapResult.getInt("width")), resolved.values().toLongArray())), jsapResult.getString("redvdr")); }
From source file:it.unimi.di.wikipedia.parsing.NamespacedWikipediaDocumentSequence.java
public static void main(final String arg[]) throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException { SimpleJSAP jsap = new SimpleJSAP(NamespacedWikipediaDocumentSequence.class.getName(), "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.", new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE), JSAP.NOT_REQUIRED, 'w', "width", "The width, in bits, of the signatures used to sign the function from URIs to their rank."), new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED, "The file containing the Wikipedia dump."), new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED, "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."), new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED, "The URIs of the documents in the collection (generated by ScanMetadata)."), new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of a precomputed virtual document resolver for the collection."), new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of the resulting virtual document resolver.") }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;/* ww w . j a va 2 s.c o m*/ final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setNamespaceAware(true); final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>(); final String baseURL = jsapResult.getString("baseURL"); final ProgressLogger progressLogger = new ProgressLogger(LOGGER); progressLogger.itemsName = "redirects"; progressLogger.start("Extracting redirects..."); final SAXParser parser = saxParserFactory.newSAXParser(); final DefaultHandler handler = new DefaultHandler() { private boolean inTitle; private MutableString title = new MutableString(); @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("page".equals(localName)) { inTitle = false; title.length(0); } else if ("title".equals(localName) && title.length() == 0) inTitle = true; // We catch only the first title element. else if ("redirect".equals(localName) && attributes.getValue("title") != null) { progressLogger.update(); redirects.put(title.copy(), attributes.getValue("title")); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("title".equals(localName)) inTitle = false; } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } }; InputStream in = new FileInputStream(jsapResult.getString("file")); if (jsapResult.userSpecified("bzip2")) in = new BZip2CompressorInputStream(in); parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)), handler); progressLogger.done(); final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>(); final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr")); progressLogger.expectedUpdates = redirects.size(); progressLogger.start("Examining redirects..."); for (Map.Entry<MutableString, String> e : redirects.entrySet()) { final MutableString start = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true)); final MutableString end = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getValue(), true)); final long s = vdr.resolve(start); if (s == -1) { final long t = vdr.resolve(end); if (t != -1) resolved.put(start.copy(), t); else LOGGER.warn("Failed redirect: " + start + " -> " + end); } else LOGGER.warn("URL " + start + " is already known to the virtual document resolver"); progressLogger.lightUpdate(); } progressLogger.done(); //System.err.println(resolved); final Iterable<MutableString> allURIs = Iterables .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet()); final long numberOfDocuments = vdr.numberOfDocuments(); final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso") ? TransformationStrategies.iso() : TransformationStrategies.utf16(); BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments, new ShiftAddXorSignedStringMap(allURIs.iterator(), new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy) .build(), jsapResult.getInt("width")), resolved.values().toLongArray())), jsapResult.getString("redvdr")); }