List of usage examples for org.xml.sax XMLReader setContentHandler
public void setContentHandler(ContentHandler handler);
From source file:org.apache.cocoon.xml.dom.DomHelper.java
/** * Creates a W3C Document that remembers the location of each element in * the source file. The location of element nodes can then be retrieved * using the {@link #getLocation(Element)} method. * * @param inputSource the inputSource to read the document from *///from w ww . j a v a 2 s .com public static Document parse(InputSource inputSource) throws SAXException, SAXNotSupportedException, IOException { try { final XMLReader parser = saxFactory.newSAXParser().getXMLReader(); final DOMBuilder builder = new DOMBuilder(); // Enhance the sax stream with location information final ContentHandler locationHandler = new LocationAttributes.Pipe(builder); parser.setContentHandler(locationHandler); parser.parse(inputSource); return builder.getDocument(); } catch (ParserConfigurationException pce) { throw new SAXException(pce); } }
From source file:org.apache.fop.fotreetest.FOTreeTestCase.java
/** * Runs a test.// w ww . j a v a 2 s. com * @throws Exception if a test or FOP itself fails */ @Test public void runTest() throws Exception { try { ResultCollector collector = ResultCollector.getInstance(); collector.reset(); SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); spf.setValidating(false); SAXParser parser = spf.newSAXParser(); XMLReader reader = parser.getXMLReader(); // Resetting values modified by processing instructions fopFactory.setBreakIndentInheritanceOnReferenceAreaBoundary( FopFactoryConfigurator.DEFAULT_BREAK_INDENT_INHERITANCE); fopFactory.setSourceResolution(FopFactoryConfigurator.DEFAULT_SOURCE_RESOLUTION); FOUserAgent ua = fopFactory.newFOUserAgent(); ua.setBaseURL(testFile.getParentFile().toURI().toURL().toString()); ua.setFOEventHandlerOverride(new DummyFOEventHandler(ua)); ua.getEventBroadcaster().addEventListener(new ConsoleEventListenerForTests(testFile.getName())); // Used to set values in the user agent through processing instructions reader = new PIListener(reader, ua); Fop fop = fopFactory.newFop(ua); reader.setContentHandler(fop.getDefaultHandler()); reader.setDTDHandler(fop.getDefaultHandler()); reader.setErrorHandler(fop.getDefaultHandler()); reader.setEntityResolver(fop.getDefaultHandler()); try { reader.parse(testFile.toURI().toURL().toExternalForm()); } catch (Exception e) { collector.notifyError(e.getLocalizedMessage()); throw e; } List<String> results = collector.getResults(); if (results.size() > 0) { for (int i = 0; i < results.size(); i++) { System.out.println((String) results.get(i)); } throw new IllegalStateException((String) results.get(0)); } } catch (Exception e) { org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Error on " + testFile.getName()); throw e; } }
From source file:org.apache.hadoop.hbase.rest.TestTableScan.java
/** * An example to scan using listener in unmarshaller for XML. * @throws Exception the exception/*from ww w .j a v a2 s. co m*/ */ @Test public void testScanUsingListenerUnmarshallerXML() throws Exception { StringBuilder builder = new StringBuilder(); builder.append("/*"); builder.append("?"); builder.append(Constants.SCAN_COLUMN + "=" + COLUMN_1); builder.append("&"); builder.append(Constants.SCAN_LIMIT + "=10"); Response response = client.get("/" + TABLE + builder.toString(), Constants.MIMETYPE_XML); assertEquals(200, response.getCode()); assertEquals(Constants.MIMETYPE_XML, response.getHeader("content-type")); JAXBContext context = JAXBContext.newInstance(ClientSideCellSetModel.class, RowModel.class, CellModel.class); Unmarshaller unmarshaller = context.createUnmarshaller(); final ClientSideCellSetModel.Listener listener = new ClientSideCellSetModel.Listener() { @Override public void handleRowModel(ClientSideCellSetModel helper, RowModel row) { assertTrue(row.getKey() != null); assertTrue(row.getCells().size() > 0); } }; // install the callback on all ClientSideCellSetModel instances unmarshaller.setListener(new Unmarshaller.Listener() { public void beforeUnmarshal(Object target, Object parent) { if (target instanceof ClientSideCellSetModel) { ((ClientSideCellSetModel) target).setCellSetModelListener(listener); } } public void afterUnmarshal(Object target, Object parent) { if (target instanceof ClientSideCellSetModel) { ((ClientSideCellSetModel) target).setCellSetModelListener(null); } } }); // create a new XML parser SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); XMLReader reader = factory.newSAXParser().getXMLReader(); reader.setContentHandler(unmarshaller.getUnmarshallerHandler()); assertFalse(ClientSideCellSetModel.listenerInvoked); reader.parse(new InputSource(response.getStream())); assertTrue(ClientSideCellSetModel.listenerInvoked); }
From source file:org.apache.jmeter.protocol.http.proxy.DefaultSamplerCreatorClassifier.java
/** * Tries parsing to see if content is xml * /*from w ww .j a va2 s . c o m*/ * @param postData * String * @return boolean */ private static final boolean isPotentialXml(String postData) { try { SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser saxParser = spf.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); ErrorDetectionHandler detectionHandler = new ErrorDetectionHandler(); xmlReader.setContentHandler(detectionHandler); xmlReader.setErrorHandler(detectionHandler); xmlReader.parse(new InputSource(new StringReader(postData))); return !detectionHandler.isErrorDetected(); } catch (ParserConfigurationException e) { return false; } catch (SAXException e) { return false; } catch (IOException e) { return false; } }
From source file:org.apache.marmotta.ucuenca.wk.provider.gs.GoogleScholarPageProvider.java
@Override public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input, String contentType) throws DataRetrievalException { log.debug("Request Successful to {0}", requestUrl); final GSXMLHandler gsXMLHandler = new GSXMLHandler(); gsXMLHandler.clearGSresultList();/*from w w w. j a va 2 s .c o m*/ try { XMLReader xr = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser"); xr.setContentHandler(gsXMLHandler); InputSource gsxml = new InputSource(input); gsxml.setEncoding("iso-8859-1"); xr.parse(gsxml); final Set<GSresult> gsresultlist = gsXMLHandler.getGSresultList(); Gson gson = new Gson(); JsonArray json = new JsonArray(); for (GSresult d : gsresultlist) { json.add(gson.toJsonTree(d).getAsJsonObject()); } JSONtoRDF parser = new JSONtoRDF(resource, GoogleScholarProvider.MAPPINGSCHEMA, json, triples); try { parser.parse(); } catch (Exception e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } } catch (SAXException | IOException e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } // try { // List<String> candidates = new ArrayList<String>(); // ValueFactory factory = ValueFactoryImpl.getInstance(); // final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); // for(Element element: queryElements(doc, "/result/hits/hit/info/url")) { // String candidate = element.getText(); // triples.add(factory.createStatement(factory.createURI( resource ), FOAF.member, factory.createURI( candidate ) )); // candidates.add(candidate); // } // ClientConfiguration conf = new ClientConfiguration(); // LDClient ldClient = new LDClient(conf); // if(!candidates.isEmpty()) { // Model candidateModel = null; // for(String author: candidates) { // ClientResponse response = ldClient.retrieveResource(author); // Model authorModel = response.getData(); // if(candidateModel == null) { // candidateModel = authorModel; // } else { // candidateModel.addAll(authorModel); // } // } // triples.addAll(candidateModel); // } // }catch (IOException e) { // throw new DataRetrievalException("I/O error while parsing HTML response", e); // }catch (JDOMException e) { // throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e); // } return Collections.emptyList(); }
From source file:org.apache.marmotta.ucuenca.wk.provider.gs.GoogleScholarProvider.java
@Override public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input, String contentType) throws DataRetrievalException { log.debug("Request Successful to {0}", requestUrl); final GSXMLHandler gsXMLHandler = new GSXMLHandler(); gsXMLHandler.clearGSresultList();/*www . j a v a 2 s . c om*/ try { XMLReader xr = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser"); xr.setContentHandler(gsXMLHandler); InputSource gsxml = new InputSource(input); gsxml.setEncoding("iso-8859-1"); xr.parse(gsxml); final Set<GSresult> gsresultlist = gsXMLHandler.getGSresultList(); Gson gson = new Gson(); JsonArray json = new JsonArray(); for (GSresult d : gsresultlist) { json.add(gson.toJsonTree(d).getAsJsonObject()); } JSONtoRDF parser = new JSONtoRDF(resource, MAPPINGSCHEMA, json, triples); try { parser.parse(); } catch (Exception e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } int numPages = (int) ((double) (gsXMLHandler.getNumResults() / 10)) + 1; int pagesLoaded = 1; Model model = null; while (pagesLoaded < numPages) { String pagenumquery = Integer.toString(pagesLoaded * 10); String moreDataUrl = String.format(API, pagenumquery, stringSearch, authorSearch, advancedSearch); ClientConfiguration conf = new ClientConfiguration(); LDClient ldClient = new LDClient(conf); ClientResponse response = ldClient.retrieveResource(moreDataUrl); Model pageModel = response.getData(); if (model == null) { model = pageModel; } else { model.addAll(pageModel); } pagesLoaded++; } triples.addAll(model); } catch (SAXException | IOException e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } // try { // List<String> candidates = new ArrayList<String>(); // ValueFactory factory = ValueFactoryImpl.getInstance(); // final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); // for(Element element: queryElements(doc, "/result/hits/hit/info/url")) { // String candidate = element.getText(); // triples.add(factory.createStatement(factory.createURI( resource ), FOAF.member, factory.createURI( candidate ) )); // candidates.add(candidate); // } // ClientConfiguration conf = new ClientConfiguration(); // LDClient ldClient = new LDClient(conf); // if(!candidates.isEmpty()) { // Model candidateModel = null; // for(String author: candidates) { // ClientResponse response = ldClient.retrieveResource(author); // Model authorModel = response.getData(); // if(candidateModel == null) { // candidateModel = authorModel; // } else { // candidateModel.addAll(authorModel); // } // } // triples.addAll(candidateModel); // } // }catch (IOException e) { // throw new DataRetrievalException("I/O error while parsing HTML response", e); // }catch (JDOMException e) { // throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e); // } return Collections.emptyList(); }
From source file:org.apache.maven.plugin.cxx.utils.svn.SvnService.java
public static SvnInfo getSvnInfo(File basedir, Credential cred, String uri, Log log, boolean noParsingFailure) throws MojoExecutionException { ByteArrayOutputStream out = new ByteArrayOutputStream(); execSvnCommand(basedir, cred, new String[] { "info", uri, "--xml" }, out, log); SvnInfo svnInfo = new SvnInfo(); try {//from ww w .j a va 2 s .c om SAXParserFactory sfactory = SAXParserFactory.newInstance(); SAXParser parser = sfactory.newSAXParser(); XMLReader xmlparser = parser.getXMLReader(); xmlparser.setContentHandler(svnInfo); xmlparser.parse(new InputSource(new ByteArrayInputStream(out.toByteArray()))); } catch (Exception e) { if (noParsingFailure) { log.error("svn info xml parsing failed : " + e); } else { throw new MojoExecutionException("svn info xml parsing failed.", e); } } return svnInfo; }
From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java
/** * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile. * * @param session/*from w w w . j av a2 s. co m*/ * The NiFi ProcessSession instance for the current invocation. */ private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF, final InputStream sheetInputStream, ExcelSheetReadConfig readConfig, CSVFormat csvFormat) throws IOException { FlowFile ff = session.create(); try { final DataFormatter formatter = new DataFormatter(); final InputSource sheetSource = new InputSource(sheetInputStream); final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat); final XMLReader parser = SAXHelper.newXMLReader(); //If Value Formatting is set to false then don't pass in the styles table. // This will cause the XSSF Handler to return the raw value instead of the formatted one. final StylesTable sst = readConfig.getFormatValues() ? readConfig.getStyles() : null; final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(sst, null, readConfig.getSharedStringsTable(), sheetHandler, formatter, false); parser.setContentHandler(handler); ff = session.write(ff, new OutputStreamCallback() { @Override public void process(OutputStream out) throws IOException { PrintStream outPrint = new PrintStream(out); sheetHandler.setOutput(outPrint); try { parser.parse(sheetSource); sheetInputStream.close(); sheetHandler.close(); outPrint.close(); } catch (SAXException se) { getLogger().error("Error occurred while processing Excel sheet {}", new Object[] { readConfig.getSheetName() }, se); } } }); ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName()); ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString()); if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) { ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key())); } else { ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME); } //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()), ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName())); ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); session.transfer(ff, SUCCESS); } catch (SAXException | ParserConfigurationException saxE) { getLogger().error("Failed to create instance of Parser.", saxE); ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage()); session.transfer(ff, FAILURE); } finally { sheetInputStream.close(); } }
From source file:org.apache.nifi.processors.standard.SplitXml.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) { final FlowFile original = session.get(); if (original == null) { return;/* ww w. ja v a 2 s . c om*/ } final int depth = context.getProperty(SPLIT_DEPTH).asInteger(); final ComponentLog logger = getLogger(); final List<FlowFile> splits = new ArrayList<>(); final String fragmentIdentifier = UUID.randomUUID().toString(); final AtomicInteger numberOfRecords = new AtomicInteger(0); final XmlSplitterSaxParser parser = new XmlSplitterSaxParser(xmlTree -> { FlowFile split = session.create(original); split = session.write(split, out -> out.write(xmlTree.getBytes("UTF-8"))); split = session.putAttribute(split, FRAGMENT_ID.key(), fragmentIdentifier); split = session.putAttribute(split, FRAGMENT_INDEX.key(), Integer.toString(numberOfRecords.getAndIncrement())); split = session.putAttribute(split, SEGMENT_ORIGINAL_FILENAME.key(), split.getAttribute(CoreAttributes.FILENAME.key())); splits.add(split); }, depth); final AtomicBoolean failed = new AtomicBoolean(false); session.read(original, rawIn -> { try (final InputStream in = new BufferedInputStream(rawIn)) { SAXParser saxParser = null; try { saxParser = saxParserFactory.newSAXParser(); final XMLReader reader = saxParser.getXMLReader(); reader.setContentHandler(parser); reader.parse(new InputSource(in)); } catch (final ParserConfigurationException | SAXException e) { logger.error("Unable to parse {} due to {}", new Object[] { original, e }); failed.set(true); } } }); if (failed.get()) { session.transfer(original, REL_FAILURE); session.remove(splits); } else { splits.forEach((split) -> { split = session.putAttribute(split, FRAGMENT_COUNT.key(), Integer.toString(numberOfRecords.get())); session.transfer(split, REL_SPLIT); }); final FlowFile originalToTransfer = copyAttributesToOriginal(session, original, fragmentIdentifier, numberOfRecords.get()); session.transfer(originalToTransfer, REL_ORIGINAL); logger.info("Split {} into {} FlowFiles", new Object[] { originalToTransfer, splits.size() }); } }
From source file:org.apache.nutch.tools.DmozParser.java
/** * Iterate through all the items in this structured DMOZ file. * Add each URL to the web db.//from w ww .jav a 2s. c o m */ public void parseDmozFile(File dmozFile, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern) throws IOException, SAXException, ParserConfigurationException { SAXParserFactory parserFactory = SAXParserFactory.newInstance(); SAXParser parser = parserFactory.newSAXParser(); XMLReader reader = parser.getXMLReader(); // Create our own processor to receive SAX events RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew, topicPattern); reader.setContentHandler(rp); reader.setErrorHandler(rp); LOG.info("skew = " + rp.hashSkew); // // Open filtered text stream. The TextFilter makes sure that // only appropriate XML-approved Text characters are received. // Any non-conforming characters are silently skipped. // XMLCharFilter in = new XMLCharFilter(new BufferedReader( new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8"))); try { InputSource is = new InputSource(in); reader.parse(is); } catch (Exception e) { if (LOG.isFatalEnabled()) { LOG.fatal(e.toString()); e.printStackTrace(LogUtil.getFatalStream(LOG)); } System.exit(0); } finally { in.close(); } }