Example usage for org.xml.sax XMLReader parse

List of usage examples for org.xml.sax XMLReader parse

Introduction

In this page you can find the example usage for org.xml.sax XMLReader parse.

Prototype

public void parse(String systemId) throws IOException, SAXException;

Source Link

Document

Parse an XML document from a system identifier (URI).

Usage

From source file:org.apache.hadoop.hbase.rest.TestTableScan.java

/**
 * An example to scan using listener in unmarshaller for XML.
 * @throws Exception the exception//from   w w  w  . j  a  va 2s  . c o m
 */
@Test
public void testScanUsingListenerUnmarshallerXML() throws Exception {
    StringBuilder builder = new StringBuilder();
    builder.append("/*");
    builder.append("?");
    builder.append(Constants.SCAN_COLUMN + "=" + COLUMN_1);
    builder.append("&");
    builder.append(Constants.SCAN_LIMIT + "=10");
    Response response = client.get("/" + TABLE + builder.toString(), Constants.MIMETYPE_XML);
    assertEquals(200, response.getCode());
    assertEquals(Constants.MIMETYPE_XML, response.getHeader("content-type"));
    JAXBContext context = JAXBContext.newInstance(ClientSideCellSetModel.class, RowModel.class,
            CellModel.class);
    Unmarshaller unmarshaller = context.createUnmarshaller();

    final ClientSideCellSetModel.Listener listener = new ClientSideCellSetModel.Listener() {
        @Override
        public void handleRowModel(ClientSideCellSetModel helper, RowModel row) {
            assertTrue(row.getKey() != null);
            assertTrue(row.getCells().size() > 0);
        }
    };

    // install the callback on all ClientSideCellSetModel instances
    unmarshaller.setListener(new Unmarshaller.Listener() {
        public void beforeUnmarshal(Object target, Object parent) {
            if (target instanceof ClientSideCellSetModel) {
                ((ClientSideCellSetModel) target).setCellSetModelListener(listener);
            }
        }

        public void afterUnmarshal(Object target, Object parent) {
            if (target instanceof ClientSideCellSetModel) {
                ((ClientSideCellSetModel) target).setCellSetModelListener(null);
            }
        }
    });

    // create a new XML parser
    SAXParserFactory factory = SAXParserFactory.newInstance();
    factory.setNamespaceAware(true);
    XMLReader reader = factory.newSAXParser().getXMLReader();
    reader.setContentHandler(unmarshaller.getUnmarshallerHandler());
    assertFalse(ClientSideCellSetModel.listenerInvoked);
    reader.parse(new InputSource(response.getStream()));
    assertTrue(ClientSideCellSetModel.listenerInvoked);

}

From source file:org.apache.jmeter.protocol.http.proxy.DefaultSamplerCreatorClassifier.java

/**
 * Tries parsing to see if content is xml
 * /* ww  w  .  j a  v  a2 s . c  om*/
 * @param postData
 *            String
 * @return boolean
 */
private static final boolean isPotentialXml(String postData) {
    try {
        SAXParserFactory spf = SAXParserFactory.newInstance();
        SAXParser saxParser = spf.newSAXParser();
        XMLReader xmlReader = saxParser.getXMLReader();
        ErrorDetectionHandler detectionHandler = new ErrorDetectionHandler();
        xmlReader.setContentHandler(detectionHandler);
        xmlReader.setErrorHandler(detectionHandler);
        xmlReader.parse(new InputSource(new StringReader(postData)));
        return !detectionHandler.isErrorDetected();
    } catch (ParserConfigurationException e) {
        return false;
    } catch (SAXException e) {
        return false;
    } catch (IOException e) {
        return false;
    }
}

From source file:org.apache.marmotta.ucuenca.wk.provider.gs.GoogleScholarPageProvider.java

@Override
public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input,
        String contentType) throws DataRetrievalException {
    log.debug("Request Successful to {0}", requestUrl);
    final GSXMLHandler gsXMLHandler = new GSXMLHandler();
    gsXMLHandler.clearGSresultList();/* w  w  w. j  a  va  2 s . c  om*/
    try {
        XMLReader xr = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser");
        xr.setContentHandler(gsXMLHandler);
        InputSource gsxml = new InputSource(input);
        gsxml.setEncoding("iso-8859-1");
        xr.parse(gsxml);

        final Set<GSresult> gsresultlist = gsXMLHandler.getGSresultList();
        Gson gson = new Gson();
        JsonArray json = new JsonArray();
        for (GSresult d : gsresultlist) {
            json.add(gson.toJsonTree(d).getAsJsonObject());
        }
        JSONtoRDF parser = new JSONtoRDF(resource, GoogleScholarProvider.MAPPINGSCHEMA, json, triples);
        try {
            parser.parse();
        } catch (Exception e) {
            throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e);
        }

    } catch (SAXException | IOException e) {
        throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e);
    }

    //       try {
    //          List<String> candidates = new ArrayList<String>();
    //          ValueFactory factory = ValueFactoryImpl.getInstance();
    //          final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input);
    //          for(Element element: queryElements(doc, "/result/hits/hit/info/url")) {
    //             String candidate = element.getText();
    //             triples.add(factory.createStatement(factory.createURI( resource ), FOAF.member, factory.createURI( candidate ) ));
    //             candidates.add(candidate);
    //          }
    //          ClientConfiguration conf = new ClientConfiguration();
    //           LDClient ldClient = new LDClient(conf);
    //           if(!candidates.isEmpty()) {
    //              Model candidateModel = null;
    //             for(String author: candidates) {
    //                ClientResponse response = ldClient.retrieveResource(author);
    //                 Model authorModel = response.getData();
    //                 if(candidateModel == null) {
    //                    candidateModel = authorModel;
    //                 } else {
    //                    candidateModel.addAll(authorModel);
    //                 }
    //             }
    //             triples.addAll(candidateModel);
    //           }
    //       }catch (IOException e) {
    //            throw new DataRetrievalException("I/O error while parsing HTML response", e);
    //        }catch (JDOMException e) {
    //            throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e);
    //        }
    return Collections.emptyList();
}

From source file:org.apache.marmotta.ucuenca.wk.provider.gs.GoogleScholarProvider.java

@Override
public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input,
        String contentType) throws DataRetrievalException {
    log.debug("Request Successful to {0}", requestUrl);
    final GSXMLHandler gsXMLHandler = new GSXMLHandler();
    gsXMLHandler.clearGSresultList();//from w  w w  .  j a  va 2  s  . c o m
    try {
        XMLReader xr = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser");
        xr.setContentHandler(gsXMLHandler);
        InputSource gsxml = new InputSource(input);
        gsxml.setEncoding("iso-8859-1");
        xr.parse(gsxml);

        final Set<GSresult> gsresultlist = gsXMLHandler.getGSresultList();
        Gson gson = new Gson();
        JsonArray json = new JsonArray();
        for (GSresult d : gsresultlist) {
            json.add(gson.toJsonTree(d).getAsJsonObject());
        }
        JSONtoRDF parser = new JSONtoRDF(resource, MAPPINGSCHEMA, json, triples);
        try {
            parser.parse();
        } catch (Exception e) {
            throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e);
        }
        int numPages = (int) ((double) (gsXMLHandler.getNumResults() / 10)) + 1;
        int pagesLoaded = 1;
        Model model = null;
        while (pagesLoaded < numPages) {

            String pagenumquery = Integer.toString(pagesLoaded * 10);
            String moreDataUrl = String.format(API, pagenumquery, stringSearch, authorSearch, advancedSearch);
            ClientConfiguration conf = new ClientConfiguration();
            LDClient ldClient = new LDClient(conf);
            ClientResponse response = ldClient.retrieveResource(moreDataUrl);
            Model pageModel = response.getData();
            if (model == null) {
                model = pageModel;
            } else {
                model.addAll(pageModel);
            }
            pagesLoaded++;
        }
        triples.addAll(model);

    } catch (SAXException | IOException e) {
        throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e);
    }

    //       try {
    //          List<String> candidates = new ArrayList<String>();
    //          ValueFactory factory = ValueFactoryImpl.getInstance();
    //          final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input);
    //          for(Element element: queryElements(doc, "/result/hits/hit/info/url")) {
    //             String candidate = element.getText();
    //             triples.add(factory.createStatement(factory.createURI( resource ), FOAF.member, factory.createURI( candidate ) ));
    //             candidates.add(candidate);
    //          }
    //          ClientConfiguration conf = new ClientConfiguration();
    //           LDClient ldClient = new LDClient(conf);
    //           if(!candidates.isEmpty()) {
    //              Model candidateModel = null;
    //             for(String author: candidates) {
    //                ClientResponse response = ldClient.retrieveResource(author);
    //                 Model authorModel = response.getData();
    //                 if(candidateModel == null) {
    //                    candidateModel = authorModel;
    //                 } else {
    //                    candidateModel.addAll(authorModel);
    //                 }
    //             }
    //             triples.addAll(candidateModel);
    //           }
    //       }catch (IOException e) {
    //            throw new DataRetrievalException("I/O error while parsing HTML response", e);
    //        }catch (JDOMException e) {
    //            throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e);
    //        }
    return Collections.emptyList();
}

From source file:org.apache.maven.plugin.cxx.utils.svn.SvnService.java

public static SvnInfo getSvnInfo(File basedir, Credential cred, String uri, Log log, boolean noParsingFailure)
        throws MojoExecutionException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();

    execSvnCommand(basedir, cred, new String[] { "info", uri, "--xml" }, out, log);

    SvnInfo svnInfo = new SvnInfo();
    try {//from   ww w  . java  2  s  . c  o  m
        SAXParserFactory sfactory = SAXParserFactory.newInstance();
        SAXParser parser = sfactory.newSAXParser();
        XMLReader xmlparser = parser.getXMLReader();
        xmlparser.setContentHandler(svnInfo);
        xmlparser.parse(new InputSource(new ByteArrayInputStream(out.toByteArray())));
    } catch (Exception e) {
        if (noParsingFailure) {
            log.error("svn info xml parsing failed : " + e);
        } else {
            throw new MojoExecutionException("svn info xml parsing failed.", e);
        }
    }
    return svnInfo;
}

From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java

/**
 * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile.
 *
 * @param session//w w w.j  a  va 2  s . c  o  m
 *  The NiFi ProcessSession instance for the current invocation.
 */
private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF,
        final InputStream sheetInputStream, ExcelSheetReadConfig readConfig, CSVFormat csvFormat)
        throws IOException {

    FlowFile ff = session.create();
    try {
        final DataFormatter formatter = new DataFormatter();
        final InputSource sheetSource = new InputSource(sheetInputStream);

        final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);

        final XMLReader parser = SAXHelper.newXMLReader();

        //If Value Formatting is set to false then don't pass in the styles table.
        // This will cause the XSSF Handler to return the raw value instead of the formatted one.
        final StylesTable sst = readConfig.getFormatValues() ? readConfig.getStyles() : null;

        final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(sst, null,
                readConfig.getSharedStringsTable(), sheetHandler, formatter, false);

        parser.setContentHandler(handler);

        ff = session.write(ff, new OutputStreamCallback() {
            @Override
            public void process(OutputStream out) throws IOException {
                PrintStream outPrint = new PrintStream(out);
                sheetHandler.setOutput(outPrint);

                try {
                    parser.parse(sheetSource);

                    sheetInputStream.close();

                    sheetHandler.close();
                    outPrint.close();
                } catch (SAXException se) {
                    getLogger().error("Error occurred while processing Excel sheet {}",
                            new Object[] { readConfig.getSheetName() }, se);
                }
            }
        });

        ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName());
        ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString());

        if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME,
                    originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
        } else {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
        }

        //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
        ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(),
                updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
                        ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName()));
        ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);

        session.transfer(ff, SUCCESS);

    } catch (SAXException | ParserConfigurationException saxE) {
        getLogger().error("Failed to create instance of Parser.", saxE);
        ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage());
        session.transfer(ff, FAILURE);
    } finally {
        sheetInputStream.close();
    }
}

From source file:org.apache.nifi.processors.standard.SplitXml.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final FlowFile original = session.get();
    if (original == null) {
        return;// www .ja  v a  2  s.com
    }

    final int depth = context.getProperty(SPLIT_DEPTH).asInteger();
    final ComponentLog logger = getLogger();

    final List<FlowFile> splits = new ArrayList<>();
    final String fragmentIdentifier = UUID.randomUUID().toString();
    final AtomicInteger numberOfRecords = new AtomicInteger(0);
    final XmlSplitterSaxParser parser = new XmlSplitterSaxParser(xmlTree -> {
        FlowFile split = session.create(original);
        split = session.write(split, out -> out.write(xmlTree.getBytes("UTF-8")));
        split = session.putAttribute(split, FRAGMENT_ID.key(), fragmentIdentifier);
        split = session.putAttribute(split, FRAGMENT_INDEX.key(),
                Integer.toString(numberOfRecords.getAndIncrement()));
        split = session.putAttribute(split, SEGMENT_ORIGINAL_FILENAME.key(),
                split.getAttribute(CoreAttributes.FILENAME.key()));
        splits.add(split);
    }, depth);

    final AtomicBoolean failed = new AtomicBoolean(false);
    session.read(original, rawIn -> {
        try (final InputStream in = new BufferedInputStream(rawIn)) {
            SAXParser saxParser = null;
            try {
                saxParser = saxParserFactory.newSAXParser();
                final XMLReader reader = saxParser.getXMLReader();
                reader.setContentHandler(parser);
                reader.parse(new InputSource(in));
            } catch (final ParserConfigurationException | SAXException e) {
                logger.error("Unable to parse {} due to {}", new Object[] { original, e });
                failed.set(true);
            }
        }
    });

    if (failed.get()) {
        session.transfer(original, REL_FAILURE);
        session.remove(splits);
    } else {
        splits.forEach((split) -> {
            split = session.putAttribute(split, FRAGMENT_COUNT.key(), Integer.toString(numberOfRecords.get()));
            session.transfer(split, REL_SPLIT);
        });

        final FlowFile originalToTransfer = copyAttributesToOriginal(session, original, fragmentIdentifier,
                numberOfRecords.get());
        session.transfer(originalToTransfer, REL_ORIGINAL);
        logger.info("Split {} into {} FlowFiles", new Object[] { originalToTransfer, splits.size() });
    }
}

From source file:org.apache.nutch.tools.DmozParser.java

/**
 * Iterate through all the items in this structured DMOZ file.
 * Add each URL to the web db./*from  w ww  . j  av a2 s .  c o  m*/
 */
public void parseDmozFile(File dmozFile, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern)

        throws IOException, SAXException, ParserConfigurationException {

    SAXParserFactory parserFactory = SAXParserFactory.newInstance();
    SAXParser parser = parserFactory.newSAXParser();
    XMLReader reader = parser.getXMLReader();

    // Create our own processor to receive SAX events
    RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew, topicPattern);
    reader.setContentHandler(rp);
    reader.setErrorHandler(rp);
    LOG.info("skew = " + rp.hashSkew);

    //
    // Open filtered text stream.  The TextFilter makes sure that
    // only appropriate XML-approved Text characters are received.
    // Any non-conforming characters are silently skipped.
    //
    XMLCharFilter in = new XMLCharFilter(new BufferedReader(
            new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8")));
    try {
        InputSource is = new InputSource(in);
        reader.parse(is);
    } catch (Exception e) {
        if (LOG.isFatalEnabled()) {
            LOG.fatal(e.toString());
            e.printStackTrace(LogUtil.getFatalStream(LOG));
        }
        System.exit(0);
    } finally {
        in.close();
    }
}

From source file:org.apache.ode.bpel.compiler.bom.BpelObjectFactory.java

/**
 * Parse a BPEL process found at the input source.
 * @param isrc input source.//from w  w  w  .j a v  a 2s. co  m
 * @return
 * @throws SAXException
 */
public Process parse(InputSource isrc, URI systemURI) throws IOException, SAXException {
    XMLReader _xr = XMLParserUtils.getXMLReader();
    LocalEntityResolver resolver = new LocalEntityResolver();
    resolver.register(Bpel11QNames.NS_BPEL4WS_2003_03, getClass().getResource("/bpel4ws_1_1-fivesight.xsd"));
    resolver.register(Bpel20QNames.NS_WSBPEL2_0, getClass().getResource("/wsbpel_main-draft-Apr-29-2006.xsd"));
    resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_ABSTRACT,
            getClass().getResource("/ws-bpel_abstract_common_base.xsd"));
    resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_EXEC, getClass().getResource("/ws-bpel_executable.xsd"));
    resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_PLINK, getClass().getResource("/ws-bpel_plnktype.xsd"));
    resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_SERVREF,
            getClass().getResource("/ws-bpel_serviceref.xsd"));
    resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_VARPROP, getClass().getResource("/ws-bpel_varprop.xsd"));
    resolver.register(XML, getClass().getResource("/xml.xsd"));
    resolver.register(WSDL, getClass().getResource("/wsdl.xsd"));
    resolver.register(Bpel20QNames.NS_WSBPEL_PARTNERLINK_2004_03,
            getClass().getResource("/wsbpel_plinkType-draft-Apr-29-2006.xsd"));
    _xr.setEntityResolver(resolver);
    Document doc = DOMUtils.newDocument();
    _xr.setContentHandler(new DOMBuilderContentHandler(doc));
    _xr.setFeature("http://xml.org/sax/features/namespaces", true);
    _xr.setFeature("http://xml.org/sax/features/namespace-prefixes", true);

    _xr.setFeature("http://xml.org/sax/features/validation", true);
    XMLParserUtils.addExternalSchemaURL(_xr, Bpel11QNames.NS_BPEL4WS_2003_03, Bpel11QNames.NS_BPEL4WS_2003_03);
    XMLParserUtils.addExternalSchemaURL(_xr, Bpel20QNames.NS_WSBPEL2_0, Bpel20QNames.NS_WSBPEL2_0);
    XMLParserUtils.addExternalSchemaURL(_xr, Bpel20QNames.NS_WSBPEL2_0_FINAL_EXEC,
            Bpel20QNames.NS_WSBPEL2_0_FINAL_EXEC);
    XMLParserUtils.addExternalSchemaURL(_xr, Bpel20QNames.NS_WSBPEL2_0_FINAL_ABSTRACT,
            Bpel20QNames.NS_WSBPEL2_0_FINAL_ABSTRACT);

    boolean strict = Boolean
            .parseBoolean(System.getProperty("org.apache.ode.compiler.failOnValidationErrors", "false"));
    BOMSAXErrorHandler errorHandler = new BOMSAXErrorHandler(strict);
    _xr.setErrorHandler(errorHandler);
    _xr.parse(isrc);
    if (strict) {
        if (!errorHandler.wasOK()) {
            throw new SAXException("Validation errors during parsing");
        }
    } else {
        if (!errorHandler.wasOK()) {
            __log.warn(
                    "Validation errors during parsing, continuing due to -Dorg.apache.ode.compiler.failOnValidationErrors=false switch");
        }
    }
    return (Process) createBpelObject(doc.getDocumentElement(), systemURI);
}

From source file:org.apache.ojb.broker.metadata.RepositoryPersistor.java

/**
 * Read metadata by populating an instance of the target class
 * using SAXParser.//  w  w w  .j  a  v  a2s .  com
 */
private Object readMetadataFromXML(InputSource source, Class target)
        throws MalformedURLException, ParserConfigurationException, SAXException, IOException {
    // TODO: make this configurable
    boolean validate = false;

    // get a xml reader instance:
    SAXParserFactory factory = SAXParserFactory.newInstance();
    log.info("RepositoryPersistor using SAXParserFactory : " + factory.getClass().getName());
    if (validate) {
        factory.setValidating(true);
    }
    SAXParser p = factory.newSAXParser();
    XMLReader reader = p.getXMLReader();
    if (validate) {
        reader.setErrorHandler(new OJBErrorHandler());
    }

    Object result;
    if (DescriptorRepository.class.equals(target)) {
        // create an empty repository:
        DescriptorRepository repository = new DescriptorRepository();
        // create handler for building the repository structure
        ContentHandler handler = new RepositoryXmlHandler(repository);
        // tell parser to use our handler:
        reader.setContentHandler(handler);
        reader.parse(source);
        result = repository;
    } else if (ConnectionRepository.class.equals(target)) {
        // create an empty repository:
        ConnectionRepository repository = new ConnectionRepository();
        // create handler for building the repository structure
        ContentHandler handler = new ConnectionDescriptorXmlHandler(repository);
        // tell parser to use our handler:
        reader.setContentHandler(handler);
        reader.parse(source);
        //LoggerFactory.getBootLogger().info("loading XML took " + (stop - start) + " msecs");
        result = repository;
    } else
        throw new MetadataException(
                "Could not build a repository instance for '" + target + "', using source " + source);
    return result;
}