Example usage for javax.xml.stream XMLInputFactory IS_REPLACING_ENTITY_REFERENCES

List of usage examples for javax.xml.stream XMLInputFactory IS_REPLACING_ENTITY_REFERENCES

Introduction

In this page you can find the example usage for javax.xml.stream XMLInputFactory IS_REPLACING_ENTITY_REFERENCES.

Prototype

String IS_REPLACING_ENTITY_REFERENCES

To view the source code for javax.xml.stream XMLInputFactory IS_REPLACING_ENTITY_REFERENCES.

Click Source Link

Document

Requires the parser to replace internal entity references with their replacement text and report them as characters

Usage

From source file:tpt.dbweb.cat.io.TaggedTextXMLReader.java

private Iterator<TaggedText> getIterator(InputStream is, String errorMessageInfo) {

    XMLStreamReader tmpxsr = null;
    try {//from w ww. j  a v a  2 s.  com
        XMLInputFactory xif = XMLInputFactory.newInstance();
        xif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
        xif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
        xif.setProperty(XMLInputFactory.IS_VALIDATING, false);
        tmpxsr = xif.createXMLStreamReader(is);
    } catch (XMLStreamException | FactoryConfigurationError e) {
        e.printStackTrace();
        return null;
    }

    final XMLStreamReader xsr = tmpxsr;
    return new PeekIterator<TaggedText>() {

        @Override
        protected TaggedText internalNext() {
            ArrayList<TextSpan> openMarks = new ArrayList<>();
            StringBuilder pureTextSB = new StringBuilder();
            ArrayList<TextSpan> marks = new ArrayList<>();
            marks.add(new TextSpan(null, 0, 0));
            TaggedText tt = null;

            try {
                loop: while (xsr.hasNext()) {
                    xsr.next();
                    int event = xsr.getEventType();
                    switch (event) {
                    case XMLStreamConstants.START_ELEMENT:
                        if ("articles".equals(xsr.getLocalName())) {
                        } else if ("article".equals(xsr.getLocalName())) {
                            tt = new TaggedText();
                            for (int i = 0; i < xsr.getAttributeCount(); i++) {
                                if ("id".equals(xsr.getAttributeLocalName(i))) {
                                    tt.id = xsr.getAttributeValue(i);
                                }
                                tt.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i));
                            }

                        } else if ("mark".equals(xsr.getLocalName())) {
                            TextSpan tr = new TextSpan(null, pureTextSB.length(), pureTextSB.length());
                            for (int i = 0; i < xsr.getAttributeCount(); i++) {
                                tr.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i));
                            }

                            openMarks.add(tr);
                        } else if ("br".equals(xsr.getLocalName())) {
                            // TODO: how to propagate tags from the input to the output?
                        } else {
                            log.warn("ignore tag " + xsr.getLocalName());
                        }
                        break;
                    case XMLStreamConstants.END_ELEMENT:
                        if ("mark".equals(xsr.getLocalName())) {

                            // search corresponding <mark ...>
                            TextSpan tr = openMarks.remove(openMarks.size() - 1);
                            if (tr == null) {
                                log.warn("markend at " + xsr.getLocation().getCharacterOffset()
                                        + " has no corresponding mark tag");
                                break;
                            }

                            tr.end = pureTextSB.length();
                            marks.add(tr);

                        } else if ("article".equals(xsr.getLocalName())) {
                            tt.text = StringUtils.stripEnd(pureTextSB.toString().trim(), " \t\n");
                            pureTextSB = new StringBuilder();

                            tt.mentions = new ArrayList<>();
                            for (TextSpan mark : marks) {

                                String entity = mark.info().get("entity");
                                if (entity == null) {
                                    entity = mark.info().get("annotation");
                                }
                                if (entity != null) {
                                    EntityMention e = new EntityMention(tt.text, mark.start, mark.end, entity);
                                    String minMention = mark.info().get("min");
                                    String mention = e.getMention();
                                    if (minMention != null && !"".equals(minMention)) {
                                        Pattern p = Pattern.compile(Pattern.quote(minMention));
                                        Matcher m = p.matcher(mention);
                                        if (m.find()) {
                                            TextSpan min = new TextSpan(e.text, e.start + m.start(),
                                                    e.start + m.end());
                                            e.min = min;
                                            if (m.find()) {
                                                log.warn("found " + minMention + " two times in \"" + mention
                                                        + "\"");
                                            }
                                        } else {
                                            String prefix = Utility.findLongestPrefix(mention, minMention);
                                            log.warn("didn't find min mention '" + minMention + "' in text '"
                                                    + mention + "', longest prefix found: '" + prefix
                                                    + "' in article " + tt.id);
                                        }
                                    }

                                    mark.info().remove("min");
                                    mark.info().remove("entity");
                                    if (mark.info().size() > 0) {
                                        e.info().putAll(mark.info());
                                    }
                                    tt.mentions.add(e);
                                }
                            }
                            openMarks.clear();
                            marks.clear();
                            break loop;
                        }
                        break;
                    case XMLStreamConstants.CHARACTERS:
                        String toadd = xsr.getText();
                        if (pureTextSB.length() == 0) {
                            toadd = StringUtils.stripStart(toadd, " \t\n");
                        }
                        if (toadd.contains("thanks")) {
                            log.info("test");
                        }
                        pureTextSB.append(toadd);
                        break;
                    }

                }
            } catch (XMLStreamException e) {
                log.error("{}", errorMessageInfo);
                throw new RuntimeException(e);
            }
            if (tt != null && tt.mentions != null) {
                tt.mentions.sort(null);
            }
            return tt;
        }
    };
}

From source file:uk.ac.ebi.metabolomes.webservices.eutils.ESummaryXMLResponseParser.java

/**
 * Parses the whole ESummaryResult XML object, delivering a List of ESummaryResults.
 * //from   w ww  . j  a  va2 s . c  o  m
 * @param in the input stream through which the response the response can be read.
 * @return multimap with the mappings from the XML.
 * @throws javax.xml.stream.XMLStreamException
 */
public List<T> parseESummaryResult(InputStream in) throws XMLStreamException {

    XMLInputFactory2 xmlif = (XMLInputFactory2) XMLInputFactory2.newInstance();
    xmlif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.FALSE);
    xmlif.setProperty(XMLInputFactory.SUPPORT_DTD, Boolean.FALSE);
    xmlif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.TRUE);
    xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.FALSE);
    xmlif.configureForSpeed();

    XMLStreamReader2 xmlr = (XMLStreamReader2) xmlif.createXMLStreamReader(in);

    int event;

    List<T> results = new ArrayList<T>();
    T currentResult = getNewESummaryResult();

    while (xmlr.hasNext()) {
        event = xmlr.next();

        switch1: switch (event) {
        case XMLEvent.START_DOCUMENT:
            break;
        case XMLEvent.START_ELEMENT:
            //LOGGER.info("Start Element: "+xmlr.getLocalName());
            //LOGGER.info("Attributes: "+getAttributes(xmlr));
            if (xmlr.getLocalName().equalsIgnoreCase("Item")) {
                boolean done = false;

                for (Enum keyword : currentResult.getScalarKeywords()) {
                    if (hasAttributeNameWithValue(xmlr, keyword.toString())) {
                        //LOGGER.info("Entering addScalarForKeyword: "+keyword.toString()+" for "+xmlr.getLocalName());
                        currentResult.addScalarForKeyword(keyword, getFollowingCharacters(xmlr));
                        break switch1;
                    }
                }
                for (Enum keyword : currentResult.getListKeywords()) {
                    if (hasAttributeNameWithValue(xmlr, keyword.toString())) {
                        //LOGGER.info("Entering addListForKeyword: "+keyword.toString()+" for "+xmlr.getLocalName());
                        currentResult.addListForKeyword(keyword, parseList(xmlr));
                        break switch1;
                    }
                }
            }
            if (xmlr.getLocalName().equalsIgnoreCase("Id")) {
                for (Enum keyword : currentResult.getScalarKeywords()) {
                    if (keyword.toString().equalsIgnoreCase("Id")) {
                        currentResult.addScalarForKeyword(keyword, getFollowingCharacters(xmlr));
                        break switch1;
                    }
                }
            }
            /*
            if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "SID")) {
                currentResult.setId(getFollowingCharacters(xmlr));
            } else if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "SourceNameList")) {
                currentResult.setSourceNames(parseList(xmlr));
            } else if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "SourceID")) {
                currentResult.addSourceID(getFollowingCharacters(xmlr));
            } else if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "DBUrl")) {
                currentResult.setDBUrl(getFollowingCharacters(xmlr));
            } else if (xmlr.getLocalName().equalsIgnoreCase("Item") && hasAttributeNameWithValue(xmlr, "SynonymList")) {
                currentResult.setSynonyms(parseList(xmlr));
            }*/

            break;
        case XMLEvent.END_ELEMENT:
            //LOGGER.info("End Element: "+xmlr.getLocalName());
            if (xmlr.getLocalName().equalsIgnoreCase("DocSum")) {
                currentResult.wrap();
                results.add(currentResult);
                currentResult = getNewESummaryResult();
            }
            break;
        }
    }
    xmlr.closeCompletely();
    return results;
}