Example usage for javax.xml.stream XMLInputFactory IS_VALIDATING

List of usage examples for javax.xml.stream XMLInputFactory IS_VALIDATING

Introduction

In this page you can find the example usage for javax.xml.stream XMLInputFactory IS_VALIDATING.

Prototype

String IS_VALIDATING

To view the source code for javax.xml.stream XMLInputFactory IS_VALIDATING.

Click Source Link

Document

The property used to turn on/off implementation specific validation

Usage

From source file:org.sonar.server.duplication.ws.DuplicationsParser.java

private static SMInputFactory initStax() {
    XMLInputFactory xmlFactory = XMLInputFactory.newInstance();
    xmlFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
    xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE);
    // just so it won't try to load DTD in if there's DOCTYPE
    xmlFactory.setProperty(XMLInputFactory.SUPPORT_DTD, Boolean.FALSE);
    xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, Boolean.FALSE);
    return new SMInputFactory(xmlFactory);
}

From source file:tpt.dbweb.cat.io.TaggedTextXMLReader.java

private Iterator<TaggedText> getIterator(InputStream is, String errorMessageInfo) {

    XMLStreamReader tmpxsr = null;
    try {/* w  w  w .j  av a  2  s. co  m*/
        XMLInputFactory xif = XMLInputFactory.newInstance();
        xif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
        xif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
        xif.setProperty(XMLInputFactory.IS_VALIDATING, false);
        tmpxsr = xif.createXMLStreamReader(is);
    } catch (XMLStreamException | FactoryConfigurationError e) {
        e.printStackTrace();
        return null;
    }

    final XMLStreamReader xsr = tmpxsr;
    return new PeekIterator<TaggedText>() {

        @Override
        protected TaggedText internalNext() {
            ArrayList<TextSpan> openMarks = new ArrayList<>();
            StringBuilder pureTextSB = new StringBuilder();
            ArrayList<TextSpan> marks = new ArrayList<>();
            marks.add(new TextSpan(null, 0, 0));
            TaggedText tt = null;

            try {
                loop: while (xsr.hasNext()) {
                    xsr.next();
                    int event = xsr.getEventType();
                    switch (event) {
                    case XMLStreamConstants.START_ELEMENT:
                        if ("articles".equals(xsr.getLocalName())) {
                        } else if ("article".equals(xsr.getLocalName())) {
                            tt = new TaggedText();
                            for (int i = 0; i < xsr.getAttributeCount(); i++) {
                                if ("id".equals(xsr.getAttributeLocalName(i))) {
                                    tt.id = xsr.getAttributeValue(i);
                                }
                                tt.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i));
                            }

                        } else if ("mark".equals(xsr.getLocalName())) {
                            TextSpan tr = new TextSpan(null, pureTextSB.length(), pureTextSB.length());
                            for (int i = 0; i < xsr.getAttributeCount(); i++) {
                                tr.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i));
                            }

                            openMarks.add(tr);
                        } else if ("br".equals(xsr.getLocalName())) {
                            // TODO: how to propagate tags from the input to the output?
                        } else {
                            log.warn("ignore tag " + xsr.getLocalName());
                        }
                        break;
                    case XMLStreamConstants.END_ELEMENT:
                        if ("mark".equals(xsr.getLocalName())) {

                            // search corresponding <mark ...>
                            TextSpan tr = openMarks.remove(openMarks.size() - 1);
                            if (tr == null) {
                                log.warn("markend at " + xsr.getLocation().getCharacterOffset()
                                        + " has no corresponding mark tag");
                                break;
                            }

                            tr.end = pureTextSB.length();
                            marks.add(tr);

                        } else if ("article".equals(xsr.getLocalName())) {
                            tt.text = StringUtils.stripEnd(pureTextSB.toString().trim(), " \t\n");
                            pureTextSB = new StringBuilder();

                            tt.mentions = new ArrayList<>();
                            for (TextSpan mark : marks) {

                                String entity = mark.info().get("entity");
                                if (entity == null) {
                                    entity = mark.info().get("annotation");
                                }
                                if (entity != null) {
                                    EntityMention e = new EntityMention(tt.text, mark.start, mark.end, entity);
                                    String minMention = mark.info().get("min");
                                    String mention = e.getMention();
                                    if (minMention != null && !"".equals(minMention)) {
                                        Pattern p = Pattern.compile(Pattern.quote(minMention));
                                        Matcher m = p.matcher(mention);
                                        if (m.find()) {
                                            TextSpan min = new TextSpan(e.text, e.start + m.start(),
                                                    e.start + m.end());
                                            e.min = min;
                                            if (m.find()) {
                                                log.warn("found " + minMention + " two times in \"" + mention
                                                        + "\"");
                                            }
                                        } else {
                                            String prefix = Utility.findLongestPrefix(mention, minMention);
                                            log.warn("didn't find min mention '" + minMention + "' in text '"
                                                    + mention + "', longest prefix found: '" + prefix
                                                    + "' in article " + tt.id);
                                        }
                                    }

                                    mark.info().remove("min");
                                    mark.info().remove("entity");
                                    if (mark.info().size() > 0) {
                                        e.info().putAll(mark.info());
                                    }
                                    tt.mentions.add(e);
                                }
                            }
                            openMarks.clear();
                            marks.clear();
                            break loop;
                        }
                        break;
                    case XMLStreamConstants.CHARACTERS:
                        String toadd = xsr.getText();
                        if (pureTextSB.length() == 0) {
                            toadd = StringUtils.stripStart(toadd, " \t\n");
                        }
                        if (toadd.contains("thanks")) {
                            log.info("test");
                        }
                        pureTextSB.append(toadd);
                        break;
                    }

                }
            } catch (XMLStreamException e) {
                log.error("{}", errorMessageInfo);
                throw new RuntimeException(e);
            }
            if (tt != null && tt.mentions != null) {
                tt.mentions.sort(null);
            }
            return tt;
        }
    };
}