Example usage for javax.xml.stream XMLInputFactory setProperty

List of usage examples for javax.xml.stream XMLInputFactory setProperty

Introduction

In this page you can find the example usage for javax.xml.stream XMLInputFactory setProperty.

Prototype

public abstract void setProperty(java.lang.String name, Object value) throws java.lang.IllegalArgumentException;

Source Link

Document

Allows the user to set specific feature/property on the underlying implementation.

Usage

From source file:tpt.dbweb.cat.io.TaggedTextXMLReader.java

private Iterator<TaggedText> getIterator(InputStream is, String errorMessageInfo) {

    XMLStreamReader tmpxsr = null;
    try {//from   www  .ja  v a2  s  . com
        XMLInputFactory xif = XMLInputFactory.newInstance();
        xif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
        xif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
        xif.setProperty(XMLInputFactory.IS_VALIDATING, false);
        tmpxsr = xif.createXMLStreamReader(is);
    } catch (XMLStreamException | FactoryConfigurationError e) {
        e.printStackTrace();
        return null;
    }

    final XMLStreamReader xsr = tmpxsr;
    return new PeekIterator<TaggedText>() {

        @Override
        protected TaggedText internalNext() {
            ArrayList<TextSpan> openMarks = new ArrayList<>();
            StringBuilder pureTextSB = new StringBuilder();
            ArrayList<TextSpan> marks = new ArrayList<>();
            marks.add(new TextSpan(null, 0, 0));
            TaggedText tt = null;

            try {
                loop: while (xsr.hasNext()) {
                    xsr.next();
                    int event = xsr.getEventType();
                    switch (event) {
                    case XMLStreamConstants.START_ELEMENT:
                        if ("articles".equals(xsr.getLocalName())) {
                        } else if ("article".equals(xsr.getLocalName())) {
                            tt = new TaggedText();
                            for (int i = 0; i < xsr.getAttributeCount(); i++) {
                                if ("id".equals(xsr.getAttributeLocalName(i))) {
                                    tt.id = xsr.getAttributeValue(i);
                                }
                                tt.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i));
                            }

                        } else if ("mark".equals(xsr.getLocalName())) {
                            TextSpan tr = new TextSpan(null, pureTextSB.length(), pureTextSB.length());
                            for (int i = 0; i < xsr.getAttributeCount(); i++) {
                                tr.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i));
                            }

                            openMarks.add(tr);
                        } else if ("br".equals(xsr.getLocalName())) {
                            // TODO: how to propagate tags from the input to the output?
                        } else {
                            log.warn("ignore tag " + xsr.getLocalName());
                        }
                        break;
                    case XMLStreamConstants.END_ELEMENT:
                        if ("mark".equals(xsr.getLocalName())) {

                            // search corresponding <mark ...>
                            TextSpan tr = openMarks.remove(openMarks.size() - 1);
                            if (tr == null) {
                                log.warn("markend at " + xsr.getLocation().getCharacterOffset()
                                        + " has no corresponding mark tag");
                                break;
                            }

                            tr.end = pureTextSB.length();
                            marks.add(tr);

                        } else if ("article".equals(xsr.getLocalName())) {
                            tt.text = StringUtils.stripEnd(pureTextSB.toString().trim(), " \t\n");
                            pureTextSB = new StringBuilder();

                            tt.mentions = new ArrayList<>();
                            for (TextSpan mark : marks) {

                                String entity = mark.info().get("entity");
                                if (entity == null) {
                                    entity = mark.info().get("annotation");
                                }
                                if (entity != null) {
                                    EntityMention e = new EntityMention(tt.text, mark.start, mark.end, entity);
                                    String minMention = mark.info().get("min");
                                    String mention = e.getMention();
                                    if (minMention != null && !"".equals(minMention)) {
                                        Pattern p = Pattern.compile(Pattern.quote(minMention));
                                        Matcher m = p.matcher(mention);
                                        if (m.find()) {
                                            TextSpan min = new TextSpan(e.text, e.start + m.start(),
                                                    e.start + m.end());
                                            e.min = min;
                                            if (m.find()) {
                                                log.warn("found " + minMention + " two times in \"" + mention
                                                        + "\"");
                                            }
                                        } else {
                                            String prefix = Utility.findLongestPrefix(mention, minMention);
                                            log.warn("didn't find min mention '" + minMention + "' in text '"
                                                    + mention + "', longest prefix found: '" + prefix
                                                    + "' in article " + tt.id);
                                        }
                                    }

                                    mark.info().remove("min");
                                    mark.info().remove("entity");
                                    if (mark.info().size() > 0) {
                                        e.info().putAll(mark.info());
                                    }
                                    tt.mentions.add(e);
                                }
                            }
                            openMarks.clear();
                            marks.clear();
                            break loop;
                        }
                        break;
                    case XMLStreamConstants.CHARACTERS:
                        String toadd = xsr.getText();
                        if (pureTextSB.length() == 0) {
                            toadd = StringUtils.stripStart(toadd, " \t\n");
                        }
                        if (toadd.contains("thanks")) {
                            log.info("test");
                        }
                        pureTextSB.append(toadd);
                        break;
                    }

                }
            } catch (XMLStreamException e) {
                log.error("{}", errorMessageInfo);
                throw new RuntimeException(e);
            }
            if (tt != null && tt.mentions != null) {
                tt.mentions.sort(null);
            }
            return tt;
        }
    };
}