List of usage examples for javax.xml.parsers SAXParser getXMLReader
public abstract org.xml.sax.XMLReader getXMLReader() throws SAXException;
From source file:org.apache.nifi.processors.standard.SplitXml.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) { final FlowFile original = session.get(); if (original == null) { return;/*from w w w .ja va 2 s.c o m*/ } final int depth = context.getProperty(SPLIT_DEPTH).asInteger(); final ComponentLog logger = getLogger(); final List<FlowFile> splits = new ArrayList<>(); final String fragmentIdentifier = UUID.randomUUID().toString(); final AtomicInteger numberOfRecords = new AtomicInteger(0); final XmlSplitterSaxParser parser = new XmlSplitterSaxParser(xmlTree -> { FlowFile split = session.create(original); split = session.write(split, out -> out.write(xmlTree.getBytes("UTF-8"))); split = session.putAttribute(split, FRAGMENT_ID.key(), fragmentIdentifier); split = session.putAttribute(split, FRAGMENT_INDEX.key(), Integer.toString(numberOfRecords.getAndIncrement())); split = session.putAttribute(split, SEGMENT_ORIGINAL_FILENAME.key(), split.getAttribute(CoreAttributes.FILENAME.key())); splits.add(split); }, depth); final AtomicBoolean failed = new AtomicBoolean(false); session.read(original, rawIn -> { try (final InputStream in = new BufferedInputStream(rawIn)) { SAXParser saxParser = null; try { saxParser = saxParserFactory.newSAXParser(); final XMLReader reader = saxParser.getXMLReader(); reader.setContentHandler(parser); reader.parse(new InputSource(in)); } catch (final ParserConfigurationException | SAXException e) { logger.error("Unable to parse {} due to {}", new Object[] { original, e }); failed.set(true); } } }); if (failed.get()) { session.transfer(original, REL_FAILURE); session.remove(splits); } else { splits.forEach((split) -> { split = session.putAttribute(split, FRAGMENT_COUNT.key(), Integer.toString(numberOfRecords.get())); session.transfer(split, REL_SPLIT); }); final FlowFile originalToTransfer = copyAttributesToOriginal(session, original, fragmentIdentifier, numberOfRecords.get()); session.transfer(originalToTransfer, REL_ORIGINAL); logger.info("Split {} into {} FlowFiles", new Object[] { originalToTransfer, splits.size() }); } }
From source file:org.apache.nutch.tools.DmozParser.java
/** * Iterate through all the items in this structured DMOZ file. * Add each URL to the web db.// w w w .j ava 2 s . c o m */ public void parseDmozFile(File dmozFile, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern) throws IOException, SAXException, ParserConfigurationException { SAXParserFactory parserFactory = SAXParserFactory.newInstance(); SAXParser parser = parserFactory.newSAXParser(); XMLReader reader = parser.getXMLReader(); // Create our own processor to receive SAX events RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew, topicPattern); reader.setContentHandler(rp); reader.setErrorHandler(rp); LOG.info("skew = " + rp.hashSkew); // // Open filtered text stream. The TextFilter makes sure that // only appropriate XML-approved Text characters are received. // Any non-conforming characters are silently skipped. // XMLCharFilter in = new XMLCharFilter(new BufferedReader( new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8"))); try { InputSource is = new InputSource(in); reader.parse(is); } catch (Exception e) { if (LOG.isFatalEnabled()) { LOG.fatal(e.toString()); e.printStackTrace(LogUtil.getFatalStream(LOG)); } System.exit(0); } finally { in.close(); } }
From source file:org.apache.ojb.broker.metadata.RepositoryPersistor.java
/** * Read metadata by populating an instance of the target class * using SAXParser./*w ww . j a v a2 s . c o m*/ */ private Object readMetadataFromXML(InputSource source, Class target) throws MalformedURLException, ParserConfigurationException, SAXException, IOException { // TODO: make this configurable boolean validate = false; // get a xml reader instance: SAXParserFactory factory = SAXParserFactory.newInstance(); log.info("RepositoryPersistor using SAXParserFactory : " + factory.getClass().getName()); if (validate) { factory.setValidating(true); } SAXParser p = factory.newSAXParser(); XMLReader reader = p.getXMLReader(); if (validate) { reader.setErrorHandler(new OJBErrorHandler()); } Object result; if (DescriptorRepository.class.equals(target)) { // create an empty repository: DescriptorRepository repository = new DescriptorRepository(); // create handler for building the repository structure ContentHandler handler = new RepositoryXmlHandler(repository); // tell parser to use our handler: reader.setContentHandler(handler); reader.parse(source); result = repository; } else if (ConnectionRepository.class.equals(target)) { // create an empty repository: ConnectionRepository repository = new ConnectionRepository(); // create handler for building the repository structure ContentHandler handler = new ConnectionDescriptorXmlHandler(repository); // tell parser to use our handler: reader.setContentHandler(handler); reader.parse(source); //LoggerFactory.getBootLogger().info("loading XML took " + (stop - start) + " msecs"); result = repository; } else throw new MetadataException( "Could not build a repository instance for '" + target + "', using source " + source); return result; }
From source file:org.apache.oozie.util.GraphGenerator.java
/** * Stream the PNG file to client/* w ww . j a va 2s. c om*/ * @param out * @throws Exception */ public void write(OutputStream out) throws Exception { SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setFeature("http://xml.org/sax/features/external-general-entities", false); spf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); spf.setNamespaceAware(true); SAXParser saxParser = spf.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); xmlReader.setContentHandler(new XMLParser(out)); xmlReader.parse(new InputSource(new StringReader(xml))); }
From source file:org.apache.openjpa.lib.xml.XMLFactory.java
/** * Return a SAXParser with the specified configuration. *///from w w w.ja v a 2s . co m public static SAXParser getSAXParser(boolean validating, boolean namespaceAware) { SAXParser sp; try { sp = _saxFactories[factoryIndex(validating, namespaceAware)].newSAXParser(); } catch (ParserConfigurationException pce) { throw new NestableRuntimeException(pce); } catch (SAXException se) { throw new NestableRuntimeException(se); } if (validating) { try { sp.getXMLReader().setErrorHandler(_validating); } catch (SAXException se) { throw new NestableRuntimeException(se); } } return sp; }
From source file:org.apache.openmeetings.db.dao.label.LabelDao.java
private static List<StringLabel> getLabels(InputStream is) throws Exception { final List<StringLabel> labels = new ArrayList<StringLabel>(); SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true);/* ww w . ja v a 2 s . c o m*/ try { SAXParser parser = spf.newSAXParser(); XMLReader xr = parser.getXMLReader(); xr.setContentHandler(new ContentHandler() { StringLabel label = null; @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { } @Override public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { if (ENTRY_ELEMENT.equals(localName)) { label = new StringLabel(atts.getValue(KEY_ATTR), ""); } } @Override public void startDocument() throws SAXException { } @Override public void skippedEntity(String name) throws SAXException { } @Override public void setDocumentLocator(Locator locator) { } @Override public void processingInstruction(String target, String data) throws SAXException { } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { } @Override public void endPrefixMapping(String prefix) throws SAXException { } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if (ENTRY_ELEMENT.equals(localName)) { labels.add(label); } } @Override public void endDocument() throws SAXException { } @Override public void characters(char[] ch, int start, int length) throws SAXException { StringBuilder sb = new StringBuilder(label.getValue()); sb.append(ch, start, length); label.setValue(sb.toString()); } }); xr.parse(new InputSource(is)); } catch (Exception e) { throw e; } return labels; }
From source file:org.apache.poi.xssf.eventusermodel.XLSX2CSV.java
/** * Parses and shows the content of one sheet * using the specified styles and shared-strings tables. * * @param styles/* www . j a va 2 s . c om*/ * @param strings * @param sheetInputStream */ public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException { InputSource sheetSource = new InputSource(sheetInputStream); SAXParserFactory saxFactory = SAXParserFactory.newInstance(); SAXParser saxParser = saxFactory.newSAXParser(); XMLReader sheetParser = saxParser.getXMLReader(); ContentHandler handler = new MyXSSFSheetHandler(styles, strings, this.minColumns, this.output); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); }
From source file:org.atombeat.xquery.functions.util.RequestGetData.java
public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { RequestModule myModule = (RequestModule) context.getModule(RequestModule.NAMESPACE_URI); // request object is read from global variable $request Variable var = myModule.resolveVariable(RequestModule.REQUEST_VAR); if (var == null || var.getValue() == null) throw new XPathException(this, "No request object found in the current XQuery context."); if (var.getValue().getItemType() != Type.JAVA_OBJECT) throw new XPathException(this, "Variable $request is not bound to an Java object."); JavaObjectValue value = (JavaObjectValue) var.getValue().itemAt(0); if (value.getObject() instanceof RequestWrapper) { RequestWrapper request = (RequestWrapper) value.getObject(); //if the content length is unknown, return if (request.getContentLength() == -1) { return Sequence.EMPTY_SEQUENCE; }//from w w w .j a va2 s. com //first, get the content of the request byte[] bufRequestData = null; try { InputStream is = request.getInputStream(); ByteArrayOutputStream bos = new ByteArrayOutputStream(request.getContentLength()); byte[] buf = new byte[256]; int l = 0; while ((l = is.read(buf)) > -1) { bos.write(buf, 0, l); } bufRequestData = bos.toByteArray(); } catch (IOException ioe) { throw new XPathException(this, "An IO exception ocurred: " + ioe.getMessage(), ioe); } //was there any POST content if (bufRequestData != null) { //determine if exists mime database considers this binary data String contentType = request.getContentType(); if (contentType != null) { //strip off any charset encoding info if (contentType.indexOf(";") > -1) contentType = contentType.substring(0, contentType.indexOf(";")); MimeType mimeType = MimeTable.getInstance().getContentType(contentType); //<atombeat> // this code will only encode the request data if the mimeType // is present in the mime table, and the mimeType is stated // as binary... // if(mimeType != null) // { // if(!mimeType.isXMLType()) // { // //binary data // return new Base64Binary(bufRequestData); // } // } // this code takes a more conservative position and assumes that // if the mime type is not present in the table, the request // data should be treated as binary, and should be encoded as // base 64... if (mimeType == null || !mimeType.isXMLType()) { return new Base64Binary(bufRequestData); } //</atombeat> } //try and parse as an XML documemnt, otherwise fallback to returning the data as a string context.pushDocumentContext(); try { //try and construct xml document from input stream, we use eXist's in-memory DOM implementation SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); //TODO : we should be able to cope with context.getBaseURI() InputSource src = new InputSource(new ByteArrayInputStream(bufRequestData)); SAXParser parser = factory.newSAXParser(); XMLReader reader = parser.getXMLReader(); MemTreeBuilder builder = context.getDocumentBuilder(); DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(builder, true); reader.setContentHandler(receiver); reader.parse(src); Document doc = receiver.getDocument(); return (NodeValue) doc.getDocumentElement(); } catch (ParserConfigurationException e) { //do nothing, we will default to trying to return a string below } catch (SAXException e) { //do nothing, we will default to trying to return a string below } catch (IOException e) { //do nothing, we will default to trying to return a string below } finally { context.popDocumentContext(); } //not a valid XML document, return a string representation of the document String encoding = request.getCharacterEncoding(); if (encoding == null) { encoding = "UTF-8"; } try { String s = new String(bufRequestData, encoding); return new StringValue(s); } catch (IOException e) { throw new XPathException(this, "An IO exception ocurred: " + e.getMessage(), e); } } else { //no post data return Sequence.EMPTY_SEQUENCE; } } else { throw new XPathException(this, "Variable $request is not bound to a Request object."); } }
From source file:org.betaconceptframework.astroboa.test.util.JAXBValidationUtils.java
public void validateUsingSAX(InputStream is) throws Exception { SAXParser saxParser = parserFactory.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); xmlReader.setEntityResolver(entityResolver); xmlReader.setErrorHandler(errorHandler); errorHandler.setIgnoreInvalidElementSequence(false); is = encodeURLsFoundInXML(is);// w ww. j a v a 2 s . c o m xmlReader.parse(new InputSource(is)); }
From source file:org.castor.xml.AbstractInternalContext.java
@Override public XMLReader getXMLReader(final String features) { XMLReader reader = null;// w ww . j a va2 s . c om Boolean validation = _properties.getBoolean(XMLProperties.PARSER_VALIDATION); Boolean namespaces = _properties.getBoolean(XMLProperties.NAMESPACES); String readerClassName = _properties.getString(XMLProperties.PARSER); if (readerClassName == null || readerClassName.length() == 0) { SAXParser saxParser = XMLParserUtils.getSAXParser(validation.booleanValue(), namespaces.booleanValue()); if (saxParser != null) { try { reader = saxParser.getXMLReader(); } catch (SAXException e) { LOG.error(Messages.format("conf.configurationError", e)); } } } if (reader == null) { if ((readerClassName == null) || (readerClassName.length() == 0) || (readerClassName.equalsIgnoreCase("xerces"))) { readerClassName = "org.apache.xerces.parsers.SAXParser"; } reader = XMLParserUtils.instantiateXMLReader(readerClassName); } XMLParserUtils.setFeaturesOnXmlReader(_properties.getString(XMLProperties.PARSER_FEATURES, features), _properties.getString(XMLProperties.PARSER_FEATURES_DISABLED, ""), validation.booleanValue(), namespaces.booleanValue(), reader); return reader; }