Example usage for javax.xml.parsers SAXParser getXMLReader

List of usage examples for javax.xml.parsers SAXParser getXMLReader

Introduction

In this page you can find the example usage for javax.xml.parsers SAXParser getXMLReader.

Prototype


public abstract org.xml.sax.XMLReader getXMLReader() throws SAXException;

Source Link

Document

Returns the org.xml.sax.XMLReader that is encapsulated by the implementation of this class.

Usage

From source file:org.apache.nifi.processors.standard.SplitXml.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final FlowFile original = session.get();
    if (original == null) {
        return;/*from  w w  w  .ja  va 2  s.c  o m*/
    }

    final int depth = context.getProperty(SPLIT_DEPTH).asInteger();
    final ComponentLog logger = getLogger();

    final List<FlowFile> splits = new ArrayList<>();
    final String fragmentIdentifier = UUID.randomUUID().toString();
    final AtomicInteger numberOfRecords = new AtomicInteger(0);
    final XmlSplitterSaxParser parser = new XmlSplitterSaxParser(xmlTree -> {
        FlowFile split = session.create(original);
        split = session.write(split, out -> out.write(xmlTree.getBytes("UTF-8")));
        split = session.putAttribute(split, FRAGMENT_ID.key(), fragmentIdentifier);
        split = session.putAttribute(split, FRAGMENT_INDEX.key(),
                Integer.toString(numberOfRecords.getAndIncrement()));
        split = session.putAttribute(split, SEGMENT_ORIGINAL_FILENAME.key(),
                split.getAttribute(CoreAttributes.FILENAME.key()));
        splits.add(split);
    }, depth);

    final AtomicBoolean failed = new AtomicBoolean(false);
    session.read(original, rawIn -> {
        try (final InputStream in = new BufferedInputStream(rawIn)) {
            SAXParser saxParser = null;
            try {
                saxParser = saxParserFactory.newSAXParser();
                final XMLReader reader = saxParser.getXMLReader();
                reader.setContentHandler(parser);
                reader.parse(new InputSource(in));
            } catch (final ParserConfigurationException | SAXException e) {
                logger.error("Unable to parse {} due to {}", new Object[] { original, e });
                failed.set(true);
            }
        }
    });

    if (failed.get()) {
        session.transfer(original, REL_FAILURE);
        session.remove(splits);
    } else {
        splits.forEach((split) -> {
            split = session.putAttribute(split, FRAGMENT_COUNT.key(), Integer.toString(numberOfRecords.get()));
            session.transfer(split, REL_SPLIT);
        });

        final FlowFile originalToTransfer = copyAttributesToOriginal(session, original, fragmentIdentifier,
                numberOfRecords.get());
        session.transfer(originalToTransfer, REL_ORIGINAL);
        logger.info("Split {} into {} FlowFiles", new Object[] { originalToTransfer, splits.size() });
    }
}

From source file:org.apache.nutch.tools.DmozParser.java

/**
 * Iterate through all the items in this structured DMOZ file.
 * Add each URL to the web db.// w w  w .j  ava 2 s  . c  o  m
 */
public void parseDmozFile(File dmozFile, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern)

        throws IOException, SAXException, ParserConfigurationException {

    SAXParserFactory parserFactory = SAXParserFactory.newInstance();
    SAXParser parser = parserFactory.newSAXParser();
    XMLReader reader = parser.getXMLReader();

    // Create our own processor to receive SAX events
    RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew, topicPattern);
    reader.setContentHandler(rp);
    reader.setErrorHandler(rp);
    LOG.info("skew = " + rp.hashSkew);

    //
    // Open filtered text stream.  The TextFilter makes sure that
    // only appropriate XML-approved Text characters are received.
    // Any non-conforming characters are silently skipped.
    //
    XMLCharFilter in = new XMLCharFilter(new BufferedReader(
            new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8")));
    try {
        InputSource is = new InputSource(in);
        reader.parse(is);
    } catch (Exception e) {
        if (LOG.isFatalEnabled()) {
            LOG.fatal(e.toString());
            e.printStackTrace(LogUtil.getFatalStream(LOG));
        }
        System.exit(0);
    } finally {
        in.close();
    }
}

From source file:org.apache.ojb.broker.metadata.RepositoryPersistor.java

/**
 * Read metadata by populating an instance of the target class
 * using SAXParser./*w  ww  .  j  a  v a2  s  . c  o m*/
 */
private Object readMetadataFromXML(InputSource source, Class target)
        throws MalformedURLException, ParserConfigurationException, SAXException, IOException {
    // TODO: make this configurable
    boolean validate = false;

    // get a xml reader instance:
    SAXParserFactory factory = SAXParserFactory.newInstance();
    log.info("RepositoryPersistor using SAXParserFactory : " + factory.getClass().getName());
    if (validate) {
        factory.setValidating(true);
    }
    SAXParser p = factory.newSAXParser();
    XMLReader reader = p.getXMLReader();
    if (validate) {
        reader.setErrorHandler(new OJBErrorHandler());
    }

    Object result;
    if (DescriptorRepository.class.equals(target)) {
        // create an empty repository:
        DescriptorRepository repository = new DescriptorRepository();
        // create handler for building the repository structure
        ContentHandler handler = new RepositoryXmlHandler(repository);
        // tell parser to use our handler:
        reader.setContentHandler(handler);
        reader.parse(source);
        result = repository;
    } else if (ConnectionRepository.class.equals(target)) {
        // create an empty repository:
        ConnectionRepository repository = new ConnectionRepository();
        // create handler for building the repository structure
        ContentHandler handler = new ConnectionDescriptorXmlHandler(repository);
        // tell parser to use our handler:
        reader.setContentHandler(handler);
        reader.parse(source);
        //LoggerFactory.getBootLogger().info("loading XML took " + (stop - start) + " msecs");
        result = repository;
    } else
        throw new MetadataException(
                "Could not build a repository instance for '" + target + "', using source " + source);
    return result;
}

From source file:org.apache.oozie.util.GraphGenerator.java

/**
 * Stream the PNG file to client/* w ww  .  j  a va  2s. c  om*/
 * @param out
 * @throws Exception
 */
public void write(OutputStream out) throws Exception {
    SAXParserFactory spf = SAXParserFactory.newInstance();
    spf.setFeature("http://xml.org/sax/features/external-general-entities", false);
    spf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
    spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
    spf.setNamespaceAware(true);
    SAXParser saxParser = spf.newSAXParser();
    XMLReader xmlReader = saxParser.getXMLReader();
    xmlReader.setContentHandler(new XMLParser(out));
    xmlReader.parse(new InputSource(new StringReader(xml)));
}

From source file:org.apache.openjpa.lib.xml.XMLFactory.java

/**
 * Return a SAXParser with the specified configuration.
 *///from   w w  w.ja v  a 2s .  co  m
public static SAXParser getSAXParser(boolean validating, boolean namespaceAware) {
    SAXParser sp;
    try {
        sp = _saxFactories[factoryIndex(validating, namespaceAware)].newSAXParser();
    } catch (ParserConfigurationException pce) {
        throw new NestableRuntimeException(pce);
    } catch (SAXException se) {
        throw new NestableRuntimeException(se);
    }

    if (validating) {
        try {
            sp.getXMLReader().setErrorHandler(_validating);
        } catch (SAXException se) {
            throw new NestableRuntimeException(se);
        }
    }

    return sp;
}

From source file:org.apache.openmeetings.db.dao.label.LabelDao.java

private static List<StringLabel> getLabels(InputStream is) throws Exception {
    final List<StringLabel> labels = new ArrayList<StringLabel>();
    SAXParserFactory spf = SAXParserFactory.newInstance();
    spf.setNamespaceAware(true);/*  ww  w  .  ja  v a  2 s .  c o  m*/
    try {
        SAXParser parser = spf.newSAXParser();
        XMLReader xr = parser.getXMLReader();
        xr.setContentHandler(new ContentHandler() {
            StringLabel label = null;

            @Override
            public void startPrefixMapping(String prefix, String uri) throws SAXException {
            }

            @Override
            public void startElement(String uri, String localName, String qName, Attributes atts)
                    throws SAXException {
                if (ENTRY_ELEMENT.equals(localName)) {
                    label = new StringLabel(atts.getValue(KEY_ATTR), "");
                }
            }

            @Override
            public void startDocument() throws SAXException {
            }

            @Override
            public void skippedEntity(String name) throws SAXException {
            }

            @Override
            public void setDocumentLocator(Locator locator) {
            }

            @Override
            public void processingInstruction(String target, String data) throws SAXException {
            }

            @Override
            public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            }

            @Override
            public void endPrefixMapping(String prefix) throws SAXException {
            }

            @Override
            public void endElement(String uri, String localName, String qName) throws SAXException {
                if (ENTRY_ELEMENT.equals(localName)) {
                    labels.add(label);
                }
            }

            @Override
            public void endDocument() throws SAXException {
            }

            @Override
            public void characters(char[] ch, int start, int length) throws SAXException {
                StringBuilder sb = new StringBuilder(label.getValue());
                sb.append(ch, start, length);
                label.setValue(sb.toString());
            }
        });
        xr.parse(new InputSource(is));
    } catch (Exception e) {
        throw e;
    }
    return labels;
}

From source file:org.apache.poi.xssf.eventusermodel.XLSX2CSV.java

/**
 * Parses and shows the content of one sheet
 * using the specified styles and shared-strings tables.
 *
 * @param styles/* www  .  j  a  va 2  s  . c  om*/
 * @param strings
 * @param sheetInputStream
 */
public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream)
        throws IOException, ParserConfigurationException, SAXException {

    InputSource sheetSource = new InputSource(sheetInputStream);
    SAXParserFactory saxFactory = SAXParserFactory.newInstance();
    SAXParser saxParser = saxFactory.newSAXParser();
    XMLReader sheetParser = saxParser.getXMLReader();
    ContentHandler handler = new MyXSSFSheetHandler(styles, strings, this.minColumns, this.output);
    sheetParser.setContentHandler(handler);
    sheetParser.parse(sheetSource);
}

From source file:org.atombeat.xquery.functions.util.RequestGetData.java

public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {

    RequestModule myModule = (RequestModule) context.getModule(RequestModule.NAMESPACE_URI);

    // request object is read from global variable $request
    Variable var = myModule.resolveVariable(RequestModule.REQUEST_VAR);

    if (var == null || var.getValue() == null)
        throw new XPathException(this, "No request object found in the current XQuery context.");

    if (var.getValue().getItemType() != Type.JAVA_OBJECT)
        throw new XPathException(this, "Variable $request is not bound to an Java object.");

    JavaObjectValue value = (JavaObjectValue) var.getValue().itemAt(0);

    if (value.getObject() instanceof RequestWrapper) {
        RequestWrapper request = (RequestWrapper) value.getObject();

        //if the content length is unknown, return
        if (request.getContentLength() == -1) {
            return Sequence.EMPTY_SEQUENCE;
        }//from   w w w  .j a  va2 s. com

        //first, get the content of the request
        byte[] bufRequestData = null;
        try {
            InputStream is = request.getInputStream();
            ByteArrayOutputStream bos = new ByteArrayOutputStream(request.getContentLength());
            byte[] buf = new byte[256];
            int l = 0;
            while ((l = is.read(buf)) > -1) {
                bos.write(buf, 0, l);
            }
            bufRequestData = bos.toByteArray();
        } catch (IOException ioe) {
            throw new XPathException(this, "An IO exception ocurred: " + ioe.getMessage(), ioe);
        }

        //was there any POST content
        if (bufRequestData != null) {
            //determine if exists mime database considers this binary data
            String contentType = request.getContentType();
            if (contentType != null) {
                //strip off any charset encoding info
                if (contentType.indexOf(";") > -1)
                    contentType = contentType.substring(0, contentType.indexOf(";"));

                MimeType mimeType = MimeTable.getInstance().getContentType(contentType);
                //<atombeat>
                // this code will only encode the request data if the mimeType
                // is present in the mime table, and the mimeType is stated
                // as binary...

                //               if(mimeType != null)
                //               {
                //                  if(!mimeType.isXMLType())
                //                  {
                //                     //binary data
                //                     return new Base64Binary(bufRequestData);
                //                  }
                //               }

                // this code takes a more conservative position and assumes that
                // if the mime type is not present in the table, the request
                // data should be treated as binary, and should be encoded as 
                // base 64...

                if (mimeType == null || !mimeType.isXMLType()) {
                    return new Base64Binary(bufRequestData);
                }
                //</atombeat>               
            }

            //try and parse as an XML documemnt, otherwise fallback to returning the data as a string
            context.pushDocumentContext();
            try {
                //try and construct xml document from input stream, we use eXist's in-memory DOM implementation
                SAXParserFactory factory = SAXParserFactory.newInstance();
                factory.setNamespaceAware(true);
                //TODO : we should be able to cope with context.getBaseURI()            
                InputSource src = new InputSource(new ByteArrayInputStream(bufRequestData));
                SAXParser parser = factory.newSAXParser();
                XMLReader reader = parser.getXMLReader();
                MemTreeBuilder builder = context.getDocumentBuilder();
                DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(builder, true);
                reader.setContentHandler(receiver);
                reader.parse(src);
                Document doc = receiver.getDocument();
                return (NodeValue) doc.getDocumentElement();
            } catch (ParserConfigurationException e) {
                //do nothing, we will default to trying to return a string below
            } catch (SAXException e) {
                //do nothing, we will default to trying to return a string below
            } catch (IOException e) {
                //do nothing, we will default to trying to return a string below
            } finally {
                context.popDocumentContext();
            }

            //not a valid XML document, return a string representation of the document
            String encoding = request.getCharacterEncoding();
            if (encoding == null) {
                encoding = "UTF-8";
            }
            try {
                String s = new String(bufRequestData, encoding);
                return new StringValue(s);
            } catch (IOException e) {
                throw new XPathException(this, "An IO exception ocurred: " + e.getMessage(), e);
            }
        } else {
            //no post data
            return Sequence.EMPTY_SEQUENCE;
        }
    } else {
        throw new XPathException(this, "Variable $request is not bound to a Request object.");
    }
}

From source file:org.betaconceptframework.astroboa.test.util.JAXBValidationUtils.java

public void validateUsingSAX(InputStream is) throws Exception {
    SAXParser saxParser = parserFactory.newSAXParser();

    XMLReader xmlReader = saxParser.getXMLReader();
    xmlReader.setEntityResolver(entityResolver);
    xmlReader.setErrorHandler(errorHandler);

    errorHandler.setIgnoreInvalidElementSequence(false);

    is = encodeURLsFoundInXML(is);//  w  ww.  j a  v a  2  s . c o  m

    xmlReader.parse(new InputSource(is));
}

From source file:org.castor.xml.AbstractInternalContext.java

@Override
public XMLReader getXMLReader(final String features) {
    XMLReader reader = null;// w ww  . j  a  va2  s  .  c om
    Boolean validation = _properties.getBoolean(XMLProperties.PARSER_VALIDATION);
    Boolean namespaces = _properties.getBoolean(XMLProperties.NAMESPACES);

    String readerClassName = _properties.getString(XMLProperties.PARSER);

    if (readerClassName == null || readerClassName.length() == 0) {
        SAXParser saxParser = XMLParserUtils.getSAXParser(validation.booleanValue(), namespaces.booleanValue());
        if (saxParser != null) {
            try {
                reader = saxParser.getXMLReader();
            } catch (SAXException e) {
                LOG.error(Messages.format("conf.configurationError", e));
            }
        }
    }

    if (reader == null) {
        if ((readerClassName == null) || (readerClassName.length() == 0)
                || (readerClassName.equalsIgnoreCase("xerces"))) {
            readerClassName = "org.apache.xerces.parsers.SAXParser";
        }

        reader = XMLParserUtils.instantiateXMLReader(readerClassName);
    }

    XMLParserUtils.setFeaturesOnXmlReader(_properties.getString(XMLProperties.PARSER_FEATURES, features),
            _properties.getString(XMLProperties.PARSER_FEATURES_DISABLED, ""), validation.booleanValue(),
            namespaces.booleanValue(), reader);

    return reader;

}