List of usage examples for javax.xml.stream XMLInputFactory IS_REPLACING_ENTITY_REFERENCES
String IS_REPLACING_ENTITY_REFERENCES
To view the source code for javax.xml.stream XMLInputFactory IS_REPLACING_ENTITY_REFERENCES.
Click Source Link
From source file:tpt.dbweb.cat.io.TaggedTextXMLReader.java
private Iterator<TaggedText> getIterator(InputStream is, String errorMessageInfo) { XMLStreamReader tmpxsr = null; try {//from w ww. j a v a 2 s. com XMLInputFactory xif = XMLInputFactory.newInstance(); xif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); xif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); xif.setProperty(XMLInputFactory.IS_VALIDATING, false); tmpxsr = xif.createXMLStreamReader(is); } catch (XMLStreamException | FactoryConfigurationError e) { e.printStackTrace(); return null; } final XMLStreamReader xsr = tmpxsr; return new PeekIterator<TaggedText>() { @Override protected TaggedText internalNext() { ArrayList<TextSpan> openMarks = new ArrayList<>(); StringBuilder pureTextSB = new StringBuilder(); ArrayList<TextSpan> marks = new ArrayList<>(); marks.add(new TextSpan(null, 0, 0)); TaggedText tt = null; try { loop: while (xsr.hasNext()) { xsr.next(); int event = xsr.getEventType(); switch (event) { case XMLStreamConstants.START_ELEMENT: if ("articles".equals(xsr.getLocalName())) { } else if ("article".equals(xsr.getLocalName())) { tt = new TaggedText(); for (int i = 0; i < xsr.getAttributeCount(); i++) { if ("id".equals(xsr.getAttributeLocalName(i))) { tt.id = xsr.getAttributeValue(i); } tt.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i)); } } else if ("mark".equals(xsr.getLocalName())) { TextSpan tr = new TextSpan(null, pureTextSB.length(), pureTextSB.length()); for (int i = 0; i < xsr.getAttributeCount(); i++) { tr.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i)); } openMarks.add(tr); } else if ("br".equals(xsr.getLocalName())) { // TODO: how to propagate tags from the input to the output? } else { log.warn("ignore tag " + xsr.getLocalName()); } break; case XMLStreamConstants.END_ELEMENT: if ("mark".equals(xsr.getLocalName())) { // search corresponding <mark ...> TextSpan tr = openMarks.remove(openMarks.size() - 1); if (tr == null) { log.warn("markend at " + xsr.getLocation().getCharacterOffset() + " has no corresponding mark tag"); break; } tr.end = pureTextSB.length(); marks.add(tr); } else if ("article".equals(xsr.getLocalName())) { tt.text = StringUtils.stripEnd(pureTextSB.toString().trim(), " \t\n"); pureTextSB = new StringBuilder(); tt.mentions = new ArrayList<>(); for (TextSpan mark : marks) { String entity = mark.info().get("entity"); if (entity == null) { entity = mark.info().get("annotation"); } if (entity != null) { EntityMention e = new EntityMention(tt.text, mark.start, mark.end, entity); String minMention = mark.info().get("min"); String mention = e.getMention(); if (minMention != null && !"".equals(minMention)) { Pattern p = Pattern.compile(Pattern.quote(minMention)); Matcher m = p.matcher(mention); if (m.find()) { TextSpan min = new TextSpan(e.text, e.start + m.start(), e.start + m.end()); e.min = min; if (m.find()) { log.warn("found " + minMention + " two times in \"" + mention + "\""); } } else { String prefix = Utility.findLongestPrefix(mention, minMention); log.warn("didn't find min mention '" + minMention + "' in text '" + mention + "', longest prefix found: '" + prefix + "' in article " + tt.id); } } mark.info().remove("min"); mark.info().remove("entity"); if (mark.info().size() > 0) { e.info().putAll(mark.info()); } tt.mentions.add(e); } } openMarks.clear(); marks.clear(); break loop; } break; case XMLStreamConstants.CHARACTERS: String toadd = xsr.getText(); if (pureTextSB.length() == 0) { toadd = StringUtils.stripStart(toadd, " \t\n"); } if (toadd.contains("thanks")) { log.info("test"); } pureTextSB.append(toadd); break; } } } catch (XMLStreamException e) { log.error("{}", errorMessageInfo); throw new RuntimeException(e); } if (tt != null && tt.mentions != null) { tt.mentions.sort(null); } return tt; } }; }
From source file:uk.ac.ebi.metabolomes.webservices.eutils.ESummaryXMLResponseParser.java
/** * Parses the whole ESummaryResult XML object, delivering a List of ESummaryResults. * //from w ww . j a va2 s . c o m * @param in the input stream through which the response the response can be read. * @return multimap with the mappings from the XML. * @throws javax.xml.stream.XMLStreamException */ public List<T> parseESummaryResult(InputStream in) throws XMLStreamException { XMLInputFactory2 xmlif = (XMLInputFactory2) XMLInputFactory2.newInstance(); xmlif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.FALSE); xmlif.setProperty(XMLInputFactory.SUPPORT_DTD, Boolean.FALSE); xmlif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.TRUE); xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.FALSE); xmlif.configureForSpeed(); XMLStreamReader2 xmlr = (XMLStreamReader2) xmlif.createXMLStreamReader(in); int event; List<T> results = new ArrayList<T>(); T currentResult = getNewESummaryResult(); while (xmlr.hasNext()) { event = xmlr.next(); switch1: switch (event) { case XMLEvent.START_DOCUMENT: break; case XMLEvent.START_ELEMENT: //LOGGER.info("Start Element: "+xmlr.getLocalName()); //LOGGER.info("Attributes: "+getAttributes(xmlr)); if (xmlr.getLocalName().equalsIgnoreCase("Item")) { boolean done = false; for (Enum keyword : currentResult.getScalarKeywords()) { if (hasAttributeNameWithValue(xmlr, keyword.toString())) { //LOGGER.info("Entering addScalarForKeyword: "+keyword.toString()+" for "+xmlr.getLocalName()); currentResult.addScalarForKeyword(keyword, getFollowingCharacters(xmlr)); break switch1; } } for (Enum keyword : currentResult.getListKeywords()) { if (hasAttributeNameWithValue(xmlr, keyword.toString())) { //LOGGER.info("Entering addListForKeyword: "+keyword.toString()+" for "+xmlr.getLocalName()); currentResult.addListForKeyword(keyword, parseList(xmlr)); break switch1; } } } if (xmlr.getLocalName().equalsIgnoreCase("Id")) { for (Enum keyword : currentResult.getScalarKeywords()) { if (keyword.toString().equalsIgnoreCase("Id")) { currentResult.addScalarForKeyword(keyword, getFollowingCharacters(xmlr)); break switch1; } } } /* if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "SID")) { currentResult.setId(getFollowingCharacters(xmlr)); } else if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "SourceNameList")) { currentResult.setSourceNames(parseList(xmlr)); } else if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "SourceID")) { currentResult.addSourceID(getFollowingCharacters(xmlr)); } else if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "DBUrl")) { currentResult.setDBUrl(getFollowingCharacters(xmlr)); } else if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "SynonymList")) { currentResult.setSynonyms(parseList(xmlr)); }*/ break; case XMLEvent.END_ELEMENT: //LOGGER.info("End Element: "+xmlr.getLocalName()); if (xmlr.getLocalName().equalsIgnoreCase("DocSum")) { currentResult.wrap(); results.add(currentResult); currentResult = getNewESummaryResult(); } break; } } xmlr.closeCompletely(); return results; }