List of usage examples for org.xml.sax XMLReader parse
public void parse(String systemId) throws IOException, SAXException;
From source file:org.apache.hadoop.hbase.rest.TestTableScan.java
/** * An example to scan using listener in unmarshaller for XML. * @throws Exception the exception//from w w w . j a va 2s . c o m */ @Test public void testScanUsingListenerUnmarshallerXML() throws Exception { StringBuilder builder = new StringBuilder(); builder.append("/*"); builder.append("?"); builder.append(Constants.SCAN_COLUMN + "=" + COLUMN_1); builder.append("&"); builder.append(Constants.SCAN_LIMIT + "=10"); Response response = client.get("/" + TABLE + builder.toString(), Constants.MIMETYPE_XML); assertEquals(200, response.getCode()); assertEquals(Constants.MIMETYPE_XML, response.getHeader("content-type")); JAXBContext context = JAXBContext.newInstance(ClientSideCellSetModel.class, RowModel.class, CellModel.class); Unmarshaller unmarshaller = context.createUnmarshaller(); final ClientSideCellSetModel.Listener listener = new ClientSideCellSetModel.Listener() { @Override public void handleRowModel(ClientSideCellSetModel helper, RowModel row) { assertTrue(row.getKey() != null); assertTrue(row.getCells().size() > 0); } }; // install the callback on all ClientSideCellSetModel instances unmarshaller.setListener(new Unmarshaller.Listener() { public void beforeUnmarshal(Object target, Object parent) { if (target instanceof ClientSideCellSetModel) { ((ClientSideCellSetModel) target).setCellSetModelListener(listener); } } public void afterUnmarshal(Object target, Object parent) { if (target instanceof ClientSideCellSetModel) { ((ClientSideCellSetModel) target).setCellSetModelListener(null); } } }); // create a new XML parser SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); XMLReader reader = factory.newSAXParser().getXMLReader(); reader.setContentHandler(unmarshaller.getUnmarshallerHandler()); assertFalse(ClientSideCellSetModel.listenerInvoked); reader.parse(new InputSource(response.getStream())); assertTrue(ClientSideCellSetModel.listenerInvoked); }
From source file:org.apache.jmeter.protocol.http.proxy.DefaultSamplerCreatorClassifier.java
/** * Tries parsing to see if content is xml * /* ww w . j a v a2 s . c om*/ * @param postData * String * @return boolean */ private static final boolean isPotentialXml(String postData) { try { SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser saxParser = spf.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); ErrorDetectionHandler detectionHandler = new ErrorDetectionHandler(); xmlReader.setContentHandler(detectionHandler); xmlReader.setErrorHandler(detectionHandler); xmlReader.parse(new InputSource(new StringReader(postData))); return !detectionHandler.isErrorDetected(); } catch (ParserConfigurationException e) { return false; } catch (SAXException e) { return false; } catch (IOException e) { return false; } }
From source file:org.apache.marmotta.ucuenca.wk.provider.gs.GoogleScholarPageProvider.java
@Override public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input, String contentType) throws DataRetrievalException { log.debug("Request Successful to {0}", requestUrl); final GSXMLHandler gsXMLHandler = new GSXMLHandler(); gsXMLHandler.clearGSresultList();/* w w w. j a va 2 s . c om*/ try { XMLReader xr = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser"); xr.setContentHandler(gsXMLHandler); InputSource gsxml = new InputSource(input); gsxml.setEncoding("iso-8859-1"); xr.parse(gsxml); final Set<GSresult> gsresultlist = gsXMLHandler.getGSresultList(); Gson gson = new Gson(); JsonArray json = new JsonArray(); for (GSresult d : gsresultlist) { json.add(gson.toJsonTree(d).getAsJsonObject()); } JSONtoRDF parser = new JSONtoRDF(resource, GoogleScholarProvider.MAPPINGSCHEMA, json, triples); try { parser.parse(); } catch (Exception e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } } catch (SAXException | IOException e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } // try { // List<String> candidates = new ArrayList<String>(); // ValueFactory factory = ValueFactoryImpl.getInstance(); // final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); // for(Element element: queryElements(doc, "/result/hits/hit/info/url")) { // String candidate = element.getText(); // triples.add(factory.createStatement(factory.createURI( resource ), FOAF.member, factory.createURI( candidate ) )); // candidates.add(candidate); // } // ClientConfiguration conf = new ClientConfiguration(); // LDClient ldClient = new LDClient(conf); // if(!candidates.isEmpty()) { // Model candidateModel = null; // for(String author: candidates) { // ClientResponse response = ldClient.retrieveResource(author); // Model authorModel = response.getData(); // if(candidateModel == null) { // candidateModel = authorModel; // } else { // candidateModel.addAll(authorModel); // } // } // triples.addAll(candidateModel); // } // }catch (IOException e) { // throw new DataRetrievalException("I/O error while parsing HTML response", e); // }catch (JDOMException e) { // throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e); // } return Collections.emptyList(); }
From source file:org.apache.marmotta.ucuenca.wk.provider.gs.GoogleScholarProvider.java
@Override public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input, String contentType) throws DataRetrievalException { log.debug("Request Successful to {0}", requestUrl); final GSXMLHandler gsXMLHandler = new GSXMLHandler(); gsXMLHandler.clearGSresultList();//from w w w . j a va 2 s . c o m try { XMLReader xr = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser"); xr.setContentHandler(gsXMLHandler); InputSource gsxml = new InputSource(input); gsxml.setEncoding("iso-8859-1"); xr.parse(gsxml); final Set<GSresult> gsresultlist = gsXMLHandler.getGSresultList(); Gson gson = new Gson(); JsonArray json = new JsonArray(); for (GSresult d : gsresultlist) { json.add(gson.toJsonTree(d).getAsJsonObject()); } JSONtoRDF parser = new JSONtoRDF(resource, MAPPINGSCHEMA, json, triples); try { parser.parse(); } catch (Exception e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } int numPages = (int) ((double) (gsXMLHandler.getNumResults() / 10)) + 1; int pagesLoaded = 1; Model model = null; while (pagesLoaded < numPages) { String pagenumquery = Integer.toString(pagesLoaded * 10); String moreDataUrl = String.format(API, pagenumquery, stringSearch, authorSearch, advancedSearch); ClientConfiguration conf = new ClientConfiguration(); LDClient ldClient = new LDClient(conf); ClientResponse response = ldClient.retrieveResource(moreDataUrl); Model pageModel = response.getData(); if (model == null) { model = pageModel; } else { model.addAll(pageModel); } pagesLoaded++; } triples.addAll(model); } catch (SAXException | IOException e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } // try { // List<String> candidates = new ArrayList<String>(); // ValueFactory factory = ValueFactoryImpl.getInstance(); // final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); // for(Element element: queryElements(doc, "/result/hits/hit/info/url")) { // String candidate = element.getText(); // triples.add(factory.createStatement(factory.createURI( resource ), FOAF.member, factory.createURI( candidate ) )); // candidates.add(candidate); // } // ClientConfiguration conf = new ClientConfiguration(); // LDClient ldClient = new LDClient(conf); // if(!candidates.isEmpty()) { // Model candidateModel = null; // for(String author: candidates) { // ClientResponse response = ldClient.retrieveResource(author); // Model authorModel = response.getData(); // if(candidateModel == null) { // candidateModel = authorModel; // } else { // candidateModel.addAll(authorModel); // } // } // triples.addAll(candidateModel); // } // }catch (IOException e) { // throw new DataRetrievalException("I/O error while parsing HTML response", e); // }catch (JDOMException e) { // throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e); // } return Collections.emptyList(); }
From source file:org.apache.maven.plugin.cxx.utils.svn.SvnService.java
public static SvnInfo getSvnInfo(File basedir, Credential cred, String uri, Log log, boolean noParsingFailure) throws MojoExecutionException { ByteArrayOutputStream out = new ByteArrayOutputStream(); execSvnCommand(basedir, cred, new String[] { "info", uri, "--xml" }, out, log); SvnInfo svnInfo = new SvnInfo(); try {//from ww w . java 2 s . c o m SAXParserFactory sfactory = SAXParserFactory.newInstance(); SAXParser parser = sfactory.newSAXParser(); XMLReader xmlparser = parser.getXMLReader(); xmlparser.setContentHandler(svnInfo); xmlparser.parse(new InputSource(new ByteArrayInputStream(out.toByteArray()))); } catch (Exception e) { if (noParsingFailure) { log.error("svn info xml parsing failed : " + e); } else { throw new MojoExecutionException("svn info xml parsing failed.", e); } } return svnInfo; }
From source file:org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor.java
/** * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile. * * @param session//w w w.j a va 2 s . c o m * The NiFi ProcessSession instance for the current invocation. */ private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF, final InputStream sheetInputStream, ExcelSheetReadConfig readConfig, CSVFormat csvFormat) throws IOException { FlowFile ff = session.create(); try { final DataFormatter formatter = new DataFormatter(); final InputSource sheetSource = new InputSource(sheetInputStream); final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat); final XMLReader parser = SAXHelper.newXMLReader(); //If Value Formatting is set to false then don't pass in the styles table. // This will cause the XSSF Handler to return the raw value instead of the formatted one. final StylesTable sst = readConfig.getFormatValues() ? readConfig.getStyles() : null; final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(sst, null, readConfig.getSharedStringsTable(), sheetHandler, formatter, false); parser.setContentHandler(handler); ff = session.write(ff, new OutputStreamCallback() { @Override public void process(OutputStream out) throws IOException { PrintStream outPrint = new PrintStream(out); sheetHandler.setOutput(outPrint); try { parser.parse(sheetSource); sheetInputStream.close(); sheetHandler.close(); outPrint.close(); } catch (SAXException se) { getLogger().error("Error occurred while processing Excel sheet {}", new Object[] { readConfig.getSheetName() }, se); } } }); ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName()); ff = session.putAttribute(ff, ROW_NUM, new Long(sheetHandler.getRowCount()).toString()); if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) { ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key())); } else { ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME); } //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()), ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName())); ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); session.transfer(ff, SUCCESS); } catch (SAXException | ParserConfigurationException saxE) { getLogger().error("Failed to create instance of Parser.", saxE); ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage()); session.transfer(ff, FAILURE); } finally { sheetInputStream.close(); } }
From source file:org.apache.nifi.processors.standard.SplitXml.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) { final FlowFile original = session.get(); if (original == null) { return;// www .ja v a 2 s.com } final int depth = context.getProperty(SPLIT_DEPTH).asInteger(); final ComponentLog logger = getLogger(); final List<FlowFile> splits = new ArrayList<>(); final String fragmentIdentifier = UUID.randomUUID().toString(); final AtomicInteger numberOfRecords = new AtomicInteger(0); final XmlSplitterSaxParser parser = new XmlSplitterSaxParser(xmlTree -> { FlowFile split = session.create(original); split = session.write(split, out -> out.write(xmlTree.getBytes("UTF-8"))); split = session.putAttribute(split, FRAGMENT_ID.key(), fragmentIdentifier); split = session.putAttribute(split, FRAGMENT_INDEX.key(), Integer.toString(numberOfRecords.getAndIncrement())); split = session.putAttribute(split, SEGMENT_ORIGINAL_FILENAME.key(), split.getAttribute(CoreAttributes.FILENAME.key())); splits.add(split); }, depth); final AtomicBoolean failed = new AtomicBoolean(false); session.read(original, rawIn -> { try (final InputStream in = new BufferedInputStream(rawIn)) { SAXParser saxParser = null; try { saxParser = saxParserFactory.newSAXParser(); final XMLReader reader = saxParser.getXMLReader(); reader.setContentHandler(parser); reader.parse(new InputSource(in)); } catch (final ParserConfigurationException | SAXException e) { logger.error("Unable to parse {} due to {}", new Object[] { original, e }); failed.set(true); } } }); if (failed.get()) { session.transfer(original, REL_FAILURE); session.remove(splits); } else { splits.forEach((split) -> { split = session.putAttribute(split, FRAGMENT_COUNT.key(), Integer.toString(numberOfRecords.get())); session.transfer(split, REL_SPLIT); }); final FlowFile originalToTransfer = copyAttributesToOriginal(session, original, fragmentIdentifier, numberOfRecords.get()); session.transfer(originalToTransfer, REL_ORIGINAL); logger.info("Split {} into {} FlowFiles", new Object[] { originalToTransfer, splits.size() }); } }
From source file:org.apache.nutch.tools.DmozParser.java
/** * Iterate through all the items in this structured DMOZ file. * Add each URL to the web db./*from w ww . j av a2 s . c o m*/ */ public void parseDmozFile(File dmozFile, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern) throws IOException, SAXException, ParserConfigurationException { SAXParserFactory parserFactory = SAXParserFactory.newInstance(); SAXParser parser = parserFactory.newSAXParser(); XMLReader reader = parser.getXMLReader(); // Create our own processor to receive SAX events RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew, topicPattern); reader.setContentHandler(rp); reader.setErrorHandler(rp); LOG.info("skew = " + rp.hashSkew); // // Open filtered text stream. The TextFilter makes sure that // only appropriate XML-approved Text characters are received. // Any non-conforming characters are silently skipped. // XMLCharFilter in = new XMLCharFilter(new BufferedReader( new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8"))); try { InputSource is = new InputSource(in); reader.parse(is); } catch (Exception e) { if (LOG.isFatalEnabled()) { LOG.fatal(e.toString()); e.printStackTrace(LogUtil.getFatalStream(LOG)); } System.exit(0); } finally { in.close(); } }
From source file:org.apache.ode.bpel.compiler.bom.BpelObjectFactory.java
/** * Parse a BPEL process found at the input source. * @param isrc input source.//from w w w .j a v a 2s. co m * @return * @throws SAXException */ public Process parse(InputSource isrc, URI systemURI) throws IOException, SAXException { XMLReader _xr = XMLParserUtils.getXMLReader(); LocalEntityResolver resolver = new LocalEntityResolver(); resolver.register(Bpel11QNames.NS_BPEL4WS_2003_03, getClass().getResource("/bpel4ws_1_1-fivesight.xsd")); resolver.register(Bpel20QNames.NS_WSBPEL2_0, getClass().getResource("/wsbpel_main-draft-Apr-29-2006.xsd")); resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_ABSTRACT, getClass().getResource("/ws-bpel_abstract_common_base.xsd")); resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_EXEC, getClass().getResource("/ws-bpel_executable.xsd")); resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_PLINK, getClass().getResource("/ws-bpel_plnktype.xsd")); resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_SERVREF, getClass().getResource("/ws-bpel_serviceref.xsd")); resolver.register(Bpel20QNames.NS_WSBPEL2_0_FINAL_VARPROP, getClass().getResource("/ws-bpel_varprop.xsd")); resolver.register(XML, getClass().getResource("/xml.xsd")); resolver.register(WSDL, getClass().getResource("/wsdl.xsd")); resolver.register(Bpel20QNames.NS_WSBPEL_PARTNERLINK_2004_03, getClass().getResource("/wsbpel_plinkType-draft-Apr-29-2006.xsd")); _xr.setEntityResolver(resolver); Document doc = DOMUtils.newDocument(); _xr.setContentHandler(new DOMBuilderContentHandler(doc)); _xr.setFeature("http://xml.org/sax/features/namespaces", true); _xr.setFeature("http://xml.org/sax/features/namespace-prefixes", true); _xr.setFeature("http://xml.org/sax/features/validation", true); XMLParserUtils.addExternalSchemaURL(_xr, Bpel11QNames.NS_BPEL4WS_2003_03, Bpel11QNames.NS_BPEL4WS_2003_03); XMLParserUtils.addExternalSchemaURL(_xr, Bpel20QNames.NS_WSBPEL2_0, Bpel20QNames.NS_WSBPEL2_0); XMLParserUtils.addExternalSchemaURL(_xr, Bpel20QNames.NS_WSBPEL2_0_FINAL_EXEC, Bpel20QNames.NS_WSBPEL2_0_FINAL_EXEC); XMLParserUtils.addExternalSchemaURL(_xr, Bpel20QNames.NS_WSBPEL2_0_FINAL_ABSTRACT, Bpel20QNames.NS_WSBPEL2_0_FINAL_ABSTRACT); boolean strict = Boolean .parseBoolean(System.getProperty("org.apache.ode.compiler.failOnValidationErrors", "false")); BOMSAXErrorHandler errorHandler = new BOMSAXErrorHandler(strict); _xr.setErrorHandler(errorHandler); _xr.parse(isrc); if (strict) { if (!errorHandler.wasOK()) { throw new SAXException("Validation errors during parsing"); } } else { if (!errorHandler.wasOK()) { __log.warn( "Validation errors during parsing, continuing due to -Dorg.apache.ode.compiler.failOnValidationErrors=false switch"); } } return (Process) createBpelObject(doc.getDocumentElement(), systemURI); }
From source file:org.apache.ojb.broker.metadata.RepositoryPersistor.java
/** * Read metadata by populating an instance of the target class * using SAXParser.// w w w .j a v a2s . com */ private Object readMetadataFromXML(InputSource source, Class target) throws MalformedURLException, ParserConfigurationException, SAXException, IOException { // TODO: make this configurable boolean validate = false; // get a xml reader instance: SAXParserFactory factory = SAXParserFactory.newInstance(); log.info("RepositoryPersistor using SAXParserFactory : " + factory.getClass().getName()); if (validate) { factory.setValidating(true); } SAXParser p = factory.newSAXParser(); XMLReader reader = p.getXMLReader(); if (validate) { reader.setErrorHandler(new OJBErrorHandler()); } Object result; if (DescriptorRepository.class.equals(target)) { // create an empty repository: DescriptorRepository repository = new DescriptorRepository(); // create handler for building the repository structure ContentHandler handler = new RepositoryXmlHandler(repository); // tell parser to use our handler: reader.setContentHandler(handler); reader.parse(source); result = repository; } else if (ConnectionRepository.class.equals(target)) { // create an empty repository: ConnectionRepository repository = new ConnectionRepository(); // create handler for building the repository structure ContentHandler handler = new ConnectionDescriptorXmlHandler(repository); // tell parser to use our handler: reader.setContentHandler(handler); reader.parse(source); //LoggerFactory.getBootLogger().info("loading XML took " + (stop - start) + " msecs"); result = repository; } else throw new MetadataException( "Could not build a repository instance for '" + target + "', using source " + source); return result; }