Example usage for javax.xml.stream XMLEventReader peek

List of usage examples for javax.xml.stream XMLEventReader peek

Introduction

In this page you can find the example usage for javax.xml.stream XMLEventReader peek.

Prototype

public XMLEvent peek() throws XMLStreamException;

Source Link

Document

Check the next XMLEvent without reading it from the stream.

Usage

From source file:com.streamsets.pipeline.lib.xml.StreamingXmlParser.java

void skipIgnorable(XMLEventReader reader) throws XMLStreamException {
    while (reader.hasNext() && isIgnorable(reader.peek())) {
        reader.nextEvent();/* ww  w .ja va2 s.co  m*/
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

/**
 * Move the iterator so that a call to nextEvent will return the beginning of a post tag.
 *
 * @param rdr//from w  w  w .j  ava  2 s .  co  m
 * @throws XMLStreamException
 */
private void iterateToPosts(final XMLEventReader rdr) throws XMLStreamException {
    // Peek at the next element.
    XMLEvent fp = rdr.peek();

    // If start element and part == "post", return.
    if (fp.isStartElement()) {
        StartElement se = fp.asStartElement();
        if (se.getName().getLocalPart().equals(POST_LOCAL_NAME))
            return;
        else
            // Churn through non-post start tags.
            this.handleNonPostStartElement(rdr);
    } else
        // Drop.
        rdr.nextEvent();

    this.iterateToPosts(rdr);
}

From source file:de.tudarmstadt.ukp.dkpro.core.io.tiger.TigerXmlReader.java

@Override
public void getNext(JCas aJCas) throws IOException, CollectionException {
    Resource res = nextFile();//from w  w  w .  ja  v a 2  s. c o  m
    initCas(aJCas, res);

    posMappingProvider.configure(aJCas.getCas());

    InputStream is = null;
    try {
        is = CompressionUtils.getInputStream(res.getLocation(), res.getInputStream());

        XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
        XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(is);

        JAXBContext context = JAXBContext.newInstance(Meta.class, AnnotationDecl.class, TigerSentence.class);
        Unmarshaller unmarshaller = context.createUnmarshaller();

        JCasBuilder jb = new JCasBuilder(aJCas);

        XMLEvent e = null;
        while ((e = xmlEventReader.peek()) != null) {
            if (isStartElement(e, "s")) {
                readSentence(jb, unmarshaller.unmarshal(xmlEventReader, TigerSentence.class).getValue());
            } else {
                xmlEventReader.next();
            }

        }

        jb.close();

        // Can only do that after the builder is closed, otherwise the text is not yet set in
        // the
        // CAS and we get "null" for all token strings.
        if (pennTreeEnabled) {
            for (ROOT root : select(aJCas, ROOT.class)) {
                PennTree pt = new PennTree(aJCas, root.getBegin(), root.getEnd());
                PennTreeNode rootNode = PennTreeUtils.convertPennTree(root);
                pt.setPennTree(PennTreeUtils.toPennTree(rootNode));
                pt.addToIndexes();
            }
        }
    } catch (XMLStreamException ex1) {
        throw new IOException(ex1);
    } catch (JAXBException ex2) {
        throw new IOException(ex2);
    } finally {
        closeQuietly(is);
    }
}

From source file:org.javelin.sws.ext.bind.internal.model.ElementPattern.java

@Override
public T consume(XMLEventReader eventReader, UnmarshallingContext context) throws XMLStreamException {
    // just skip element's START_ELEMENT event
    StartElement startElement = eventReader.nextEvent().asStartElement();

    // StartElement may contain attributes - these are NOT available as separate events in eventReader.nextEvent()!
    Iterator<?> attributes = startElement.getAttributes();
    List<Attribute> attrList = new LinkedList<Attribute>();
    while (attributes.hasNext()) {
        Attribute a = (Attribute) attributes.next();
        attrList.add(a);//www  .  j a va2s. co m
    }

    T value = this.nestedPattern.consumeValue(new AttributesAwareXMLEventReader(eventReader, attrList),
            context);

    // skip element's END_ELEMENT event
    while (eventReader.peek() != null) {
        XMLEvent ev = eventReader.nextEvent();
        if (ev.getEventType() == XMLStreamConstants.END_ELEMENT)
            break;
    }

    return value;
}

From source file:org.javelin.sws.ext.bind.internal.model.ComplexTypePattern.java

@Override
public T consumeValue(XMLEventReader eventReader, UnmarshallingContext context) throws XMLStreamException {
    // first create an object to be filled (using PropertyAccessors - direct or bean) according to the content model
    T object = BeanUtils.instantiate(this.getJavaType());

    // the order is dictated by incoming events, not by the mode
    // TODO: create a property to enable strict unmarshalling - dictated by content model
    // only this (ContentModel) pattern iterates over XML Events
    XMLEvent event = null;/*  w  w  w. ja  va  2  s. co  m*/
    PropertyMetadataValue<T, ?> pmv = null;

    // this loop will only handle first level of start elements and only single end element
    // deeper levels will be handled by nested patterns
    while (true) {
        boolean end = false;
        event = eventReader.peek();
        pmv = null;

        switch (event.getEventType()) {
        case XMLStreamConstants.ATTRIBUTE:
            pmv = this.consumeNestedAttribute(eventReader, context);
            break;
        case XMLStreamConstants.CDATA:
        case XMLStreamConstants.CHARACTERS:
            // TODO: XMLEvent.ENTITY_REFERENCE?
            if (this.simpleContent != null) {
                pmv = this.consumeSimpleContent(eventReader, context);
                break;
            }
        case XMLStreamConstants.COMMENT:
        case XMLStreamConstants.DTD:
        case XMLStreamConstants.SPACE:
        case XMLStreamConstants.ENTITY_DECLARATION:
        case XMLStreamConstants.NOTATION_DECLARATION:
        case XMLStreamConstants.PROCESSING_INSTRUCTION:
            eventReader.nextEvent();
            break;
        case XMLStreamConstants.ENTITY_REFERENCE:
            // TODO: XMLEvent.ENTITY_REFERENCE?
            eventReader.nextEvent();
            break;
        case XMLStreamConstants.START_DOCUMENT:
            // strange
            break;
        case XMLStreamConstants.START_ELEMENT:
            pmv = this.consumeNestedElement(eventReader, context);
            break;
        case XMLStreamConstants.END_ELEMENT:
            // TODO: in mixed content there will be more than one end element it this content model's level
        case XMLStreamConstants.END_DOCUMENT:
            end = true;
            break;
        }

        if (end)
            break;

        if (pmv != null)
            pmv.getMetadata().setValue(object, pmv.getValue());
    }

    return (T) object;
}

From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java

public MultiValueHashMap<String, String> getPageTitle2Redirects(InputStream sWikipediaDump)
        throws FileNotFoundException, XMLStreamException {
    // <text xml:space="preserve">#REDIRECT [[Autopoiesis]]</text>
    // <text xml:space="preserve">#REDIRECT:[[Hans Leo Haler]]</text>
    // <text xml:space="preserve">#redirect [[Weier Hai]]</text>
    // #weiterleitung
    // <page>
    // <title>Autopoiesis</title>

    Logger.getLogger(WikipediaDumpParser.class.getName()).info("will collect redirects from wikipedia dump...");

    MultiValueHashMap<String, String> hsPageTitle2Redirects = new MultiValueBalancedTreeMap<String, String>();

    String strCurrentTitle = "";
    XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();

    XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(sWikipediaDump, "Utf-8");
    int iTitlesRead = 0;
    while (xmlEventReader.hasNext()) {
        XMLEvent xmlEvent = xmlEventReader.nextEvent();

        if (!xmlEvent.isStartElement())
            continue;
        // wenn wir einen Title haben, dann merken wir uns den, falls wir ihn brauchen
        if (xmlEvent.asStartElement().getName().getLocalPart().equals("title")) {
            strCurrentTitle = readNextCharEventsText(xmlEventReader);

            iTitlesRead++;/*from  w w  w. j  av  a2  s .co m*/
            if (iTitlesRead % 200000 == 0)
                Logger.getLogger(WikipediaDumpParser.class.getName())
                        .info("read doc #" + StringUtils.beautifyNumber(iTitlesRead));

            continue;
        }

        if (!xmlEvent.asStartElement().getName().getLocalPart().equals("text"))
            continue;

        // jetzt haben wir ein text-tag. Wir schauen, ob jetzt ein redirect kommt
        // entweder kommt ein charEvent oder ein EndEvent. Leere Texte gibts wohl auch
        XMLEvent nextEvent = xmlEventReader.peek();

        if (!nextEvent.isCharacters())
            continue;

        String strCharEventData = readNextCharEventsText(xmlEventReader);
        if (strCharEventData == null)
            continue;

        strCharEventData = strCharEventData.trim();

        boolean bRedirect = false;

        if (strCharEventData.length() >= 9 && strCharEventData.substring(0, 9).equalsIgnoreCase("#redirect"))
            bRedirect = true;
        if (!bRedirect && strCharEventData.length() >= 8
                && strCharEventData.substring(0, 8).equalsIgnoreCase("redirect")
                && !strCharEventData.contains("\n"))
            bRedirect = true;
        if (!bRedirect && strCharEventData.length() >= 14
                && strCharEventData.substring(0, 14).equalsIgnoreCase("#weiterleitung"))
            bRedirect = true;
        if (!bRedirect && strCharEventData.length() >= 13
                && strCharEventData.substring(0, 13).equalsIgnoreCase("weiterleitung")
                && !strCharEventData.contains("\n"))
            bRedirect = true;

        if (!bRedirect)
            continue;

        // wir haben einen redirect - der wird in unsere Datenstruktur eingetragen
        int iStart = strCharEventData.indexOf("[[");
        int iEnd = strCharEventData.indexOf("]]");
        if (iStart < 0 || iEnd < 0)
            continue;
        if (iEnd <= iStart)
            continue;
        if ((iStart + 2) > strCharEventData.length() || iEnd > strCharEventData.length())
            continue;

        String strRedirectTarget = strCharEventData.substring(iStart + 2, iEnd).trim();
        hsPageTitle2Redirects.add(strRedirectTarget, strCurrentTitle);

        // if("Venceslav Konstantinov".equalsIgnoreCase(strCurrentTitle) || "Venceslav Konstantinov".equalsIgnoreCase(strRedirectTarget))
        // System.out.println("redirect found: (" + hsPageTitle2Redirects.keySize() + ") " + strCurrentTitle + " => '" + strRedirectTarget + "'");

    }

    Logger.getLogger(WikipediaDumpParser.class.getName())
            .info("Redirects found: " + StringUtils.beautifyNumber(hsPageTitle2Redirects.valueSize()));

    return hsPageTitle2Redirects;

}

From source file:org.alex73.osm.converters.bel.Convert.java

public static void main(String[] args) throws Exception {
    loadStreetNamesForHouses();/* w w  w .  ja va2 s. c  om*/

    InputStream in = new BZip2CompressorInputStream(
            new BufferedInputStream(new FileInputStream("tmp/belarus-latest.osm.bz2"), BUFFER_SIZE));

    // create xml event reader for input stream
    XMLEventFactory eventFactory = XMLEventFactory.newInstance();
    XMLEvent newLine = eventFactory.createCharacters("\n");
    XMLInputFactory xif = XMLInputFactory.newInstance();
    XMLOutputFactory xof = XMLOutputFactory.newInstance();
    XMLEventReader reader = xif.createXMLEventReader(in);
    XMLEventWriter wrCyr = xof.createXMLEventWriter(
            new BufferedOutputStream(new FileOutputStream("tmp/belarus-bel.osm"), BUFFER_SIZE));
    XMLEventWriter wrInt = xof.createXMLEventWriter(
            new BufferedOutputStream(new FileOutputStream("tmp/belarus-intl.osm"), BUFFER_SIZE));

    // initialize jaxb
    JAXBContext jaxbCtx = JAXBContext.newInstance(Node.class, Way.class, Relation.class);
    Unmarshaller um = jaxbCtx.createUnmarshaller();
    Marshaller m = jaxbCtx.createMarshaller();
    m.setProperty(Marshaller.JAXB_FRAGMENT, true);
    m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);

    XMLEvent e = null;
    while ((e = reader.peek()) != null) {
        boolean processed = false;
        if (e.isStartElement()) {
            StartElement se = (StartElement) e;
            switch (se.getName().getLocalPart()) {
            case "way":
                Way way = um.unmarshal(reader, Way.class).getValue();
                if (way.getId() == 25439425) {
                    System.out.println();
                }
                fixBel(way.getTag(), "name:be", "name");
                String nameBeHouse = houseStreetBe.get(way.getId());
                if (nameBeHouse != null) {
                    setTag(way.getTag(), "addr:street", nameBeHouse);
                }
                m.marshal(way, wrCyr);
                fixInt(way.getTag());
                m.marshal(way, wrInt);
                wrCyr.add(newLine);
                wrInt.add(newLine);
                processed = true;
                break;
            case "node":
                Node node = um.unmarshal(reader, Node.class).getValue();
                fixBel(node.getTag(), "name:be", "name");
                // fixBel(node.getTag(),"addr:street:be","addr:street");
                m.marshal(node, wrCyr);
                fixInt(node.getTag());
                m.marshal(node, wrInt);
                wrCyr.add(newLine);
                wrInt.add(newLine);
                processed = true;
                break;
            case "relation":
                Relation relation = um.unmarshal(reader, Relation.class).getValue();
                fixBel(relation.getTag(), "name:be", "name");
                // fixBel(relation.getTag(),"addr:street:be","addr:street");
                m.marshal(relation, wrCyr);
                fixInt(relation.getTag());
                m.marshal(relation, wrInt);
                wrCyr.add(newLine);
                wrInt.add(newLine);
                processed = true;
                break;
            }
        }
        if (!processed) {
            wrCyr.add(e);
            wrInt.add(e);
        }
        reader.next();
    }

    wrCyr.flush();
    wrCyr.close();
    wrInt.flush();
    wrInt.close();
    System.out.println("UniqueTranslatedTags: " + uniqueTranslatedTags);
}

From source file:org.apache.olingo.client.core.serialization.AtomDeserializer.java

private PropertyType guessPropertyType(final XMLEventReader reader, final EdmTypeInfo typeInfo)
        throws XMLStreamException {

    XMLEvent child = null;/*from  w  w  w  .  j a va 2  s.  co  m*/
    while (reader.hasNext() && child == null) {
        final XMLEvent event = reader.peek();
        if (event.isCharacters() && event.asCharacters().isWhiteSpace()) {
            reader.nextEvent();
        } else {
            child = event;
        }
    }

    final PropertyType type;
    if (child == null) {
        type = typeInfo == null || typeInfo.isPrimitiveType() ? PropertyType.PRIMITIVE : PropertyType.ENUM;
    } else {
        if (child.isStartElement()) {
            if (Constants.NS_GML.equals(child.asStartElement().getName().getNamespaceURI())) {
                type = PropertyType.PRIMITIVE;
            } else if (elementQName.equals(child.asStartElement().getName())) {
                type = PropertyType.COLLECTION;
            } else {
                type = PropertyType.COMPLEX;
            }
        } else if (child.isCharacters()) {
            type = typeInfo == null || typeInfo.isPrimitiveType() ? PropertyType.PRIMITIVE : PropertyType.ENUM;
        } else {
            type = PropertyType.EMPTY;
        }
    }

    return type;
}

From source file:org.apache.olingo.client.core.serialization.AtomDeserializer.java

private StartElement getStartElement(final XMLEventReader reader) throws XMLStreamException {
    while (reader.hasNext()) {
        final XMLEvent innerEvent = reader.peek();
        if (innerEvent.isCharacters() && innerEvent.asCharacters().isWhiteSpace()) {
            reader.nextEvent();//from   ww  w . j a  v  a  2  s  .c  om
        } else if (innerEvent.isStartElement()) {
            return innerEvent.asStartElement();
        } else if (innerEvent.isEndElement() && inlineQName.equals(innerEvent.asEndElement().getName())) {
            return null;
        }
    }
    return null;
}

From source file:org.apache.olingo.commons.core.data.AtomPropertyDeserializer.java

private ODataPropertyType guessPropertyType(final XMLEventReader reader) throws XMLStreamException {
    XMLEvent child = null;//  w w w  . j a  v a  2  s.  co  m
    while (reader.hasNext() && child == null) {
        final XMLEvent event = reader.peek();
        if (event.isCharacters() && event.asCharacters().isWhiteSpace()) {
            reader.nextEvent();
        } else {
            child = event;
        }
    }

    final ODataPropertyType type;
    if (child == null) {
        type = ODataPropertyType.PRIMITIVE;
    } else {
        if (child.isStartElement()) {
            if (Constants.NS_GML.equals(child.asStartElement().getName().getNamespaceURI())) {
                type = ODataPropertyType.PRIMITIVE;
            } else if (elementQName.equals(child.asStartElement().getName())) {
                type = ODataPropertyType.COLLECTION;
            } else {
                type = ODataPropertyType.COMPLEX;
            }
        } else if (child.isCharacters()) {
            type = ODataPropertyType.PRIMITIVE;
        } else {
            type = ODataPropertyType.EMPTY;
        }
    }

    return type;
}