Example usage for org.xml.sax XMLReader setContentHandler

List of usage examples for org.xml.sax XMLReader setContentHandler

Introduction

In this page you can find the example usage for org.xml.sax XMLReader setContentHandler.

Prototype

public void setContentHandler(ContentHandler handler);

Source Link

Document

Allow an application to register a content event handler.

Usage

From source file:org.apache.cocoon.xml.dom.DomHelper.java

/**
 * Creates a W3C Document that remembers the location of each element in
 * the source file. The location of element nodes can then be retrieved
 * using the {@link #getLocation(Element)} method.
 *
 * @param inputSource the inputSource to read the document from
 *///from   w  ww  .  j  a  v a  2  s  .com
public static Document parse(InputSource inputSource)
        throws SAXException, SAXNotSupportedException, IOException {
    try {
        final XMLReader parser = saxFactory.newSAXParser().getXMLReader();
        final DOMBuilder builder = new DOMBuilder();

        // Enhance the sax stream with location information
        final ContentHandler locationHandler = new LocationAttributes.Pipe(builder);
        parser.setContentHandler(locationHandler);

        parser.parse(inputSource);

        return builder.getDocument();
    } catch (ParserConfigurationException pce) {
        throw new SAXException(pce);
    }
}

From source file:org.apache.fop.fotreetest.FOTreeTestCase.java

/**
 * Runs a test.// w  ww  .  j a v  a 2 s. com
 * @throws Exception if a test or FOP itself fails
 */
@Test
public void runTest() throws Exception {
    try {
        ResultCollector collector = ResultCollector.getInstance();
        collector.reset();

        SAXParserFactory spf = SAXParserFactory.newInstance();
        spf.setNamespaceAware(true);
        spf.setValidating(false);
        SAXParser parser = spf.newSAXParser();
        XMLReader reader = parser.getXMLReader();

        // Resetting values modified by processing instructions
        fopFactory.setBreakIndentInheritanceOnReferenceAreaBoundary(
                FopFactoryConfigurator.DEFAULT_BREAK_INDENT_INHERITANCE);
        fopFactory.setSourceResolution(FopFactoryConfigurator.DEFAULT_SOURCE_RESOLUTION);

        FOUserAgent ua = fopFactory.newFOUserAgent();
        ua.setBaseURL(testFile.getParentFile().toURI().toURL().toString());
        ua.setFOEventHandlerOverride(new DummyFOEventHandler(ua));
        ua.getEventBroadcaster().addEventListener(new ConsoleEventListenerForTests(testFile.getName()));

        // Used to set values in the user agent through processing instructions
        reader = new PIListener(reader, ua);

        Fop fop = fopFactory.newFop(ua);

        reader.setContentHandler(fop.getDefaultHandler());
        reader.setDTDHandler(fop.getDefaultHandler());
        reader.setErrorHandler(fop.getDefaultHandler());
        reader.setEntityResolver(fop.getDefaultHandler());
        try {
            reader.parse(testFile.toURI().toURL().toExternalForm());
        } catch (Exception e) {
            collector.notifyError(e.getLocalizedMessage());
            throw e;
        }

        List<String> results = collector.getResults();
        if (results.size() > 0) {
            for (int i = 0; i < results.size(); i++) {
                System.out.println((String) results.get(i));
            }
            throw new IllegalStateException((String) results.get(0));
        }
    } catch (Exception e) {
        org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Error on " + testFile.getName());
        throw e;
    }
}

From source file:org.apache.hadoop.hbase.rest.TestTableScan.java

/**
 * An example to scan using listener in unmarshaller for XML.
 * @throws Exception the exception/*from   ww  w .j  a  v a2 s. co m*/
 */
@Test
public void testScanUsingListenerUnmarshallerXML() throws Exception {
    StringBuilder builder = new StringBuilder();
    builder.append("/*");
    builder.append("?");
    builder.append(Constants.SCAN_COLUMN + "=" + COLUMN_1);
    builder.append("&");
    builder.append(Constants.SCAN_LIMIT + "=10");
    Response response = client.get("/" + TABLE + builder.toString(), Constants.MIMETYPE_XML);
    assertEquals(200, response.getCode());
    assertEquals(Constants.MIMETYPE_XML, response.getHeader("content-type"));
    JAXBContext context = JAXBContext.newInstance(ClientSideCellSetModel.class, RowModel.class,
            CellModel.class);
    Unmarshaller unmarshaller = context.createUnmarshaller();

    final ClientSideCellSetModel.Listener listener = new ClientSideCellSetModel.Listener() {
        @Override
        public void handleRowModel(ClientSideCellSetModel helper, RowModel row) {
            assertTrue(row.getKey() != null);
            assertTrue(row.getCells().size() > 0);
        }
    };

    // install the callback on all ClientSideCellSetModel instances
    unmarshaller.setListener(new Unmarshaller.Listener() {
        public void beforeUnmarshal(Object target, Object parent) {
            if (target instanceof ClientSideCellSetModel) {
                ((ClientSideCellSetModel) target).setCellSetModelListener(listener);
            }
        }

        public void afterUnmarshal(Object target, Object parent) {
            if (target instanceof ClientSideCellSetModel) {
                ((ClientSideCellSetModel) target).setCellSetModelListener(null);
            }
        }
    });

    // create a new XML parser
    SAXParserFactory factory = SAXParserFactory.newInstance();
    factory.setNamespaceAware(true);
    XMLReader reader = factory.newSAXParser().getXMLReader();
    reader.setContentHandler(unmarshaller.getUnmarshallerHandler());
    assertFalse(ClientSideCellSetModel.listenerInvoked);
    reader.parse(new InputSource(response.getStream()));
    assertTrue(ClientSideCellSetModel.listenerInvoked);

}

From source file:org.apache.jmeter.protocol.http.proxy.DefaultSamplerCreatorClassifier.java

/**
 * Tries parsing to see if content is xml
 * /*from w ww  .j  a  va2  s . c  o  m*/
 * @param postData
 *            String
 * @return boolean
 */
private static final boolean isPotentialXml(String postData) {
    try {
        SAXParserFactory spf = SAXParserFactory.newInstance();
        SAXParser saxParser = spf.newSAXParser();
        XMLReader xmlReader = saxParser.getXMLReader();
        ErrorDetectionHandler detectionHandler = new ErrorDetectionHandler();
        xmlReader.setContentHandler(detectionHandler);
        xmlReader.setErrorHandler(detectionHandler);
        xmlReader.parse(new InputSource(new StringReader(postData)));
        return !detectionHandler.isErrorDetected();
    } catch (ParserConfigurationException e) {
        return false;
    } catch (SAXException e) {
        return false;
    } catch (IOException e) {
        return false;
    }
}

From source file:org.apache.marmotta.ucuenca.wk.provider.gs.GoogleScholarPageProvider.java

@Override
public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input,
        String contentType) throws DataRetrievalException {
    log.debug("Request Successful to {0}", requestUrl);
    final GSXMLHandler gsXMLHandler = new GSXMLHandler();
    gsXMLHandler.clearGSresultList();/*from w  w w. j a va 2  s .c o  m*/
    try {
        XMLReader xr = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser");
        xr.setContentHandler(gsXMLHandler);
        InputSource gsxml = new InputSource(input);
        gsxml.setEncoding("iso-8859-1");
        xr.parse(gsxml);

        final Set<GSresult> gsresultlist = gsXMLHandler.getGSresultList();
        Gson gson = new Gson();
        JsonArray json = new JsonArray();
        for (GSresult d : gsresultlist) {
            json.add(gson.toJsonTree(d).getAsJsonObject());
        }
        JSONtoRDF parser = new JSONtoRDF(resource, GoogleScholarProvider.MAPPINGSCHEMA, json, triples);
        try {
            parser.parse();
        } catch (Exception e) {
            throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e);
        }

    } catch (SAXException | IOException e) {
        throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e);
    }

    //       try {
    //          List<String> candidates = new ArrayList<String>();
    //          ValueFactory factory = ValueFactoryImpl.getInstance();
    //          final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input);
    //          for(Element element: queryElements(doc, "/result/hits/hit/info/url")) {
    //             String candidate = element.getText();
    //             triples.add(factory.createStatement(factory.createURI( resource ), FOAF.member, factory.createURI( candidate ) ));
    //             candidates.add(candidate);
    //          }
    //          ClientConfiguration conf = new ClientConfiguration();
    //           LDClient ldClient = new LDClient(conf);
    //           if(!candidates.isEmpty()) {
    //              Model candidateModel = null;
    //             for(String author: candidates) {
    //                ClientResponse response = ldClient.retrieveResource(author);
    //                 Model authorModel = response.getData();
    //                 if(candidateModel == null) {
    //                    candidateModel = authorModel;
    //                 } else {
    //                    candidateModel.addAll(authorModel);
    //                 }
    //             }
    //             triples.addAll(candidateModel);
    //           }
    //       }catch (IOException e) {
    //            throw new DataRetrievalException("I/O error while parsing HTML response", e);
    //        }catch (JDOMException e) {
    //            throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e);
    //        }
    return Collections.emptyList();
}

From source file:org.apache.marmotta.ucuenca.wk.provider.gs.GoogleScholarProvider.java

@Override
public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input,
        String contentType) throws DataRetrievalException {
    log.debug("Request Successful to {0}", requestUrl);
    final GSXMLHandler gsXMLHandler = new GSXMLHandler();
    gsXMLHandler.clearGSresultList();/*www . j  a v  a 2 s . c om*/
    try {
        XMLReader xr = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser");
        xr.setContentHandler(gsXMLHandler);
        InputSource gsxml = new InputSource(input);
        gsxml.setEncoding("iso-8859-1");
        xr.parse(gsxml);

        final Set<GSresult> gsresultlist = gsXMLHandler.getGSresultList();
        Gson gson = new Gson();
        JsonArray json = new JsonArray();
        for (GSresult d : gsresultlist) {
            json.add(gson.toJsonTree(d).getAsJsonObject());
        }
        JSONtoRDF parser = new JSONtoRDF(resource, MAPPINGSCHEMA, json, triples);
        try {
            parser.parse();
        } catch (Exception e) {
            throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e);
        }
        int numPages = (int) ((double) (gsXMLHandler.getNumResults() / 10)) + 1;
        int pagesLoaded = 1;
        Model model = null;
        while (pagesLoaded < numPages) {

            String pagenumquery = Integer.toString(pagesLoaded * 10);
            String moreDataUrl = String.format(API, pagenumquery, stringSearch, authorSearch, advancedSearch);
            ClientConfiguration conf = new ClientConfiguration();
            LDClient ldClient = new LDClient(conf);
            ClientResponse response = ldClient.retrieveResource(moreDataUrl);
            Model pageModel = response.getData();
            if (model == null) {
                model = pageModel;
            } else {
                model.addAll(pageModel);
            }
            pagesLoaded++;
        }
        triples.addAll(model);

    } catch (SAXException | IOException e) {
        throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e);
    }

    //       try {
    //          List<String> candidates = new ArrayList<String>();
    //          ValueFactory factory = ValueFactoryImpl.getInstance();
    //          final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input);
    //          for(Element element: queryElements(doc, "/result/hits/hit/info/url")) {
    //             String candidate = element.getText();
    //             triples.add(factory.createStatement(factory.createURI( resource ), FOAF.member, factory.createURI( candidate ) ));
    //             candidates.add(candidate);
    //          }
    //          ClientConfiguration conf = new ClientConfiguration();
    //           LDClient ldClient = new LDClient(conf);
    //           if(!candidates.isEmpty()) {
    //              Model candidateModel = null;
    //             for(String author: candidates) {
    //                ClientResponse response = ldClient.retrieveResource(author);
    //                 Model authorModel = response.getData();
    //                 if(candidateModel == null) {
    //                    candidateModel = authorModel;
    //                 } else {
    //                    candidateModel.addAll(authorModel);
    //                 }
    //             }
    //             triples.addAll(candidateModel);
    //           }
    //       }catch (IOException e) {
    //            throw new DataRetrievalException("I/O error while parsing HTML response", e);
    //        }catch (JDOMException e) {
    //            throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e);
    //        }
    return Collections.emptyList();
}

From source file:org.apache.maven.plugin.cxx.utils.svn.SvnService.java

public static SvnInfo getSvnInfo(File basedir, Credential cred, String uri, Log log, boolean noParsingFailure)
        throws MojoExecutionException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();

    execSvnCommand(basedir, cred, new String[] { "info", uri, "--xml" }, out, log);

    SvnInfo svnInfo = new SvnInfo();
    try {//from   ww w  .j a va 2 s  .c om
        SAXParserFactory sfactory = SAXParserFactory.newInstance();
        SAXParser parser = sfactory.newSAXParser();
        XMLReader xmlparser = parser.getXMLReader();
        xmlparser.setContentHandler(svnInfo);
        xmlparser.parse(new InputSource(new ByteArrayInputStream(out.toByteArray())));
    } catch (Exception e) {
        if (noParsingFailure) {
            log.error("svn info xml parsing failed : " + e);
        } else {
            throw new MojoExecutionException("svn info xml parsing failed.", e);
        }
    }
    return svnInfo;
}

From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java

/**
 * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile.
 *
 * @param session/*from w  w w  . j  av  a2 s. co  m*/
 *  The NiFi ProcessSession instance for the current invocation.
 */
private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF,
        final InputStream sheetInputStream, ExcelSheetReadConfig readConfig, CSVFormat csvFormat)
        throws IOException {

    FlowFile ff = session.create();
    try {
        final DataFormatter formatter = new DataFormatter();
        final InputSource sheetSource = new InputSource(sheetInputStream);

        final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);

        final XMLReader parser = SAXHelper.newXMLReader();

        //If Value Formatting is set to false then don't pass in the styles table.
        // This will cause the XSSF Handler to return the raw value instead of the formatted one.
        final StylesTable sst = readConfig.getFormatValues() ? readConfig.getStyles() : null;

        final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(sst, null,
                readConfig.getSharedStringsTable(), sheetHandler, formatter, false);

        parser.setContentHandler(handler);

        ff = session.write(ff, new OutputStreamCallback() {
            @Override
            public void process(OutputStream out) throws IOException {
                PrintStream outPrint = new PrintStream(out);
                sheetHandler.setOutput(outPrint);

                try {
                    parser.parse(sheetSource);

                    sheetInputStream.close();

                    sheetHandler.close();
                    outPrint.close();
                } catch (SAXException se) {
                    getLogger().error("Error occurred while processing Excel sheet {}",
                            new Object[] { readConfig.getSheetName() }, se);
                }
            }
        });

        ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName());
        ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString());

        if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME,
                    originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
        } else {
            ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
        }

        //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
        ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(),
                updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
                        ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName()));
        ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);

        session.transfer(ff, SUCCESS);

    } catch (SAXException | ParserConfigurationException saxE) {
        getLogger().error("Failed to create instance of Parser.", saxE);
        ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage());
        session.transfer(ff, FAILURE);
    } finally {
        sheetInputStream.close();
    }
}

From source file:org.apache.nifi.processors.standard.SplitXml.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final FlowFile original = session.get();
    if (original == null) {
        return;/* ww  w.  ja  v  a  2  s . c om*/
    }

    final int depth = context.getProperty(SPLIT_DEPTH).asInteger();
    final ComponentLog logger = getLogger();

    final List<FlowFile> splits = new ArrayList<>();
    final String fragmentIdentifier = UUID.randomUUID().toString();
    final AtomicInteger numberOfRecords = new AtomicInteger(0);
    final XmlSplitterSaxParser parser = new XmlSplitterSaxParser(xmlTree -> {
        FlowFile split = session.create(original);
        split = session.write(split, out -> out.write(xmlTree.getBytes("UTF-8")));
        split = session.putAttribute(split, FRAGMENT_ID.key(), fragmentIdentifier);
        split = session.putAttribute(split, FRAGMENT_INDEX.key(),
                Integer.toString(numberOfRecords.getAndIncrement()));
        split = session.putAttribute(split, SEGMENT_ORIGINAL_FILENAME.key(),
                split.getAttribute(CoreAttributes.FILENAME.key()));
        splits.add(split);
    }, depth);

    final AtomicBoolean failed = new AtomicBoolean(false);
    session.read(original, rawIn -> {
        try (final InputStream in = new BufferedInputStream(rawIn)) {
            SAXParser saxParser = null;
            try {
                saxParser = saxParserFactory.newSAXParser();
                final XMLReader reader = saxParser.getXMLReader();
                reader.setContentHandler(parser);
                reader.parse(new InputSource(in));
            } catch (final ParserConfigurationException | SAXException e) {
                logger.error("Unable to parse {} due to {}", new Object[] { original, e });
                failed.set(true);
            }
        }
    });

    if (failed.get()) {
        session.transfer(original, REL_FAILURE);
        session.remove(splits);
    } else {
        splits.forEach((split) -> {
            split = session.putAttribute(split, FRAGMENT_COUNT.key(), Integer.toString(numberOfRecords.get()));
            session.transfer(split, REL_SPLIT);
        });

        final FlowFile originalToTransfer = copyAttributesToOriginal(session, original, fragmentIdentifier,
                numberOfRecords.get());
        session.transfer(originalToTransfer, REL_ORIGINAL);
        logger.info("Split {} into {} FlowFiles", new Object[] { originalToTransfer, splits.size() });
    }
}

From source file:org.apache.nutch.tools.DmozParser.java

/**
 * Iterate through all the items in this structured DMOZ file.
 * Add each URL to the web db.//from w  ww  .jav  a  2s. c o m
 */
public void parseDmozFile(File dmozFile, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern)

        throws IOException, SAXException, ParserConfigurationException {

    SAXParserFactory parserFactory = SAXParserFactory.newInstance();
    SAXParser parser = parserFactory.newSAXParser();
    XMLReader reader = parser.getXMLReader();

    // Create our own processor to receive SAX events
    RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew, topicPattern);
    reader.setContentHandler(rp);
    reader.setErrorHandler(rp);
    LOG.info("skew = " + rp.hashSkew);

    //
    // Open filtered text stream.  The TextFilter makes sure that
    // only appropriate XML-approved Text characters are received.
    // Any non-conforming characters are silently skipped.
    //
    XMLCharFilter in = new XMLCharFilter(new BufferedReader(
            new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8")));
    try {
        InputSource is = new InputSource(in);
        reader.parse(is);
    } catch (Exception e) {
        if (LOG.isFatalEnabled()) {
            LOG.fatal(e.toString());
            e.printStackTrace(LogUtil.getFatalStream(LOG));
        }
        System.exit(0);
    } finally {
        in.close();
    }
}