Example usage for org.dom4j Element nodeIterator

List of usage examples for org.dom4j Element nodeIterator

Introduction

In this page you can find the example usage for org.dom4j Element nodeIterator.

Prototype

Iterator<Node> nodeIterator();

Source Link

Document

Returns an iterator through the content nodes of this branch

Usage

From source file:com.suneee.core.util.XMLProperties.java

License:Open Source License

/**
 * Sets a property to an array of values. Multiple values matching the same property
 * is mapped to an XML file as multiple elements containing each value.
 * For example, using the name "foo.bar.prop", and the value string array containing
 * {"some value", "other value", "last value"} would produce the following XML:
 * <pre>/*  w  w  w  .  ja v a2  s  .c om*/
 * &lt;foo&gt;
 *     &lt;bar&gt;
 *         &lt;prop&gt;some value&lt;/prop&gt;
 *         &lt;prop&gt;other value&lt;/prop&gt;
 *         &lt;prop&gt;last value&lt;/prop&gt;
 *     &lt;/bar&gt;
 * &lt;/foo&gt;
 * </pre>
 *
 * @param name the name of the property.
 * @param values the values for the property (can be empty but not null).
 */
public void setProperties(String name, List<String> values) {
    String[] propName = parsePropertyName(name);
    // Search for this property by traversing down the XML heirarchy,
    // stopping one short.
    Element element = document.getRootElement();
    for (int i = 0; i < propName.length - 1; i++) {
        // If we don't find this part of the property in the XML heirarchy
        // we add it as a new node
        if (element.element(propName[i]) == null) {
            element.addElement(propName[i]);
        }
        element = element.element(propName[i]);
    }
    String childName = propName[propName.length - 1];
    // We found matching property, clear all children.
    List<Element> toRemove = new ArrayList<Element>();
    Iterator iter = element.elementIterator(childName);
    while (iter.hasNext()) {
        toRemove.add((Element) iter.next());
    }
    for (iter = toRemove.iterator(); iter.hasNext();) {
        element.remove((Element) iter.next());
    }
    // Add the new children.
    for (String value : values) {
        Element childElement = element.addElement(childName);
        if (value.startsWith("<![CDATA[")) {
            Iterator it = childElement.nodeIterator();
            while (it.hasNext()) {
                Node node = (Node) it.next();
                if (node instanceof CDATA) {
                    childElement.remove(node);
                    break;
                }
            }
            childElement.addCDATA(value.substring(9, value.length() - 3));
        } else {
            childElement.setText(StringEscapeUtils.escapeXml(value));
        }
    }
    saveProperties();

    // Generate event.
    /*Map<String, Object> params = new HashMap<String, Object>();
    params.put("value", values);
    PropertyEventDispatcher.dispatchEvent(name,
        PropertyEventDispatcher.EventType.xml_property_set, params);*/
}

From source file:com.suneee.core.util.XMLProperties.java

License:Open Source License

/**
 * Sets the value of the specified property. If the property doesn't
 * currently exist, it will be automatically created.
 *
 * @param name  the name of the property to set.
 * @param value the new value for the property.
 *///from  ww w. ja v a 2 s  .c  om
public synchronized void setProperty(String name, String value) {
    if (!StringEscapeUtils.escapeXml(name).equals(name)) {
        throw new IllegalArgumentException("Property name cannot contain XML entities.");
    }
    if (name == null) {
        return;
    }
    if (value == null) {
        value = "";
    }

    // Set cache correctly with prop name and value.
    propertyCache.put(name, value);

    String[] propName = parsePropertyName(name);
    // Search for this property by traversing down the XML heirarchy.
    Element element = document.getRootElement();
    for (String aPropName : propName) {
        // If we don't find this part of the property in the XML heirarchy
        // we add it as a new node
        if (element.element(aPropName) == null) {
            element.addElement(aPropName);
        }
        element = element.element(aPropName);
    }
    // Set the value of the property in this node.
    if (value.startsWith("<![CDATA[")) {
        Iterator it = element.nodeIterator();
        while (it.hasNext()) {
            Node node = (Node) it.next();
            if (node instanceof CDATA) {
                element.remove(node);
                break;
            }
        }
        element.addCDATA(value.substring(9, value.length() - 3));
    } else {
        element.setText(value);
    }
    // Write the XML properties to disk
    saveProperties();

    // Generate event.
    /*   Map<String, Object> params = new HashMap<String, Object>();
       params.put("value", value);
       PropertyEventDispatcher.dispatchEvent(name,
        PropertyEventDispatcher.EventType.xml_property_set, params);*/
}

From source file:com.tao.realweb.util.XMLProperties.java

License:Open Source License

/**
 * Sets a property to an array of values. Multiple values matching the same property
 * is mapped to an XML file as multiple elements containing each value.
 * For example, using the name "foo.bar.prop", and the value string array containing
 * {"some value", "other value", "last value"} would produce the following XML:
 * <pre>//from w  w w.ja  v  a  2 s . c  om
 * &lt;foo&gt;
 *     &lt;bar&gt;
 *         &lt;prop&gt;some value&lt;/prop&gt;
 *         &lt;prop&gt;other value&lt;/prop&gt;
 *         &lt;prop&gt;last value&lt;/prop&gt;
 *     &lt;/bar&gt;
 * &lt;/foo&gt;
 * </pre>
 *
 * @param name the name of the property.
 * @param values the values for the property (can be empty but not null).
 */
public void setProperties(String name, List<String> values) {
    String[] propName = parsePropertyName(name);
    // Search for this property by traversing down the XML heirarchy,
    // stopping one short.
    Element element = document.getRootElement();
    for (int i = 0; i < propName.length - 1; i++) {
        // If we don't find this part of the property in the XML heirarchy
        // we add it as a new node
        if (element.element(propName[i]) == null) {
            element.addElement(propName[i]);
        }
        element = element.element(propName[i]);
    }
    String childName = propName[propName.length - 1];
    // We found matching property, clear all children.
    List<Element> toRemove = new ArrayList<Element>();
    Iterator iter = element.elementIterator(childName);
    while (iter.hasNext()) {
        toRemove.add((Element) iter.next());
    }
    for (iter = toRemove.iterator(); iter.hasNext();) {
        element.remove((Element) iter.next());
    }
    // Add the new children.
    for (String value : values) {
        Element childElement = element.addElement(childName);
        if (value.startsWith("<![CDATA[")) {
            Iterator it = childElement.nodeIterator();
            while (it.hasNext()) {
                Node node = (Node) it.next();
                if (node instanceof CDATA) {
                    childElement.remove(node);
                    break;
                }
            }
            childElement.addCDATA(value.substring(9, value.length() - 3));
        } else {
            childElement.setText(StringEscapeUtils.escapeXml(value));
        }
    }
    saveProperties();

    // Generate event.
    Map<String, Object> params = new HashMap<String, Object>();
    params.put("value", values);
    PropertyEventDispatcher.dispatchEvent(name, PropertyEventDispatcher.EventType.xml_property_set, params);
}

From source file:com.tao.realweb.util.XMLProperties.java

License:Open Source License

/**
 * Sets the value of the specified property. If the property doesn't
 * currently exist, it will be automatically created.
 *
 * @param name  the name of the property to set.
 * @param value the new value for the property.
 */// w w  w .  j a v a 2s  .  c o  m
public synchronized void setProperty(String name, String value) {
    if (!StringEscapeUtils.escapeXml(name).equals(name)) {
        throw new IllegalArgumentException("Property name cannot contain XML entities.");
    }
    if (name == null) {
        return;
    }
    if (value == null) {
        value = "";
    }

    // Set cache correctly with prop name and value.
    propertyCache.put(name, value);

    String[] propName = parsePropertyName(name);
    // Search for this property by traversing down the XML heirarchy.
    Element element = document.getRootElement();
    for (String aPropName : propName) {
        // If we don't find this part of the property in the XML heirarchy
        // we add it as a new node
        if (element.element(aPropName) == null) {
            element.addElement(aPropName);
        }
        element = element.element(aPropName);
    }
    // Set the value of the property in this node.
    if (value.startsWith("<![CDATA[")) {
        Iterator it = element.nodeIterator();
        while (it.hasNext()) {
            Node node = (Node) it.next();
            if (node instanceof CDATA) {
                element.remove(node);
                break;
            }
        }
        element.addCDATA(value.substring(9, value.length() - 3));
    } else {
        element.setText(value);
    }
    // Write the XML properties to disk
    saveProperties();

    // Generate event.
    Map<String, Object> params = new HashMap<String, Object>();
    params.put("value", value);
    PropertyEventDispatcher.dispatchEvent(name, PropertyEventDispatcher.EventType.xml_property_set, params);
}

From source file:com.webslingerz.jpt.PageTemplateImpl.java

License:Open Source License

private void defaultContent(Element element, ContentHandler contentHandler, LexicalHandler lexicalHandler,
        Interpreter beanShell, Stack<Map<String, Slot>> slotStack)
        throws SAXException, PageTemplateException, IOException {
    // Use default template content
    for (Iterator i = element.nodeIterator(); i.hasNext();) {
        Node node = (Node) i.next();
        switch (node.getNodeType()) {
        case Node.ELEMENT_NODE:
            processElement((Element) node, contentHandler, lexicalHandler, beanShell, slotStack);
            break;

        case Node.TEXT_NODE:
            char[] text = Expression.evaluateText(node.getText().toString(), beanShell).toCharArray();
            contentHandler.characters(text, 0, text.length);
            break;

        case Node.COMMENT_NODE:
            char[] comment = node.getText().toCharArray();
            lexicalHandler.comment(comment, 0, comment.length);
            break;

        case Node.CDATA_SECTION_NODE:
            lexicalHandler.startCDATA();
            char[] cdata = node.getText().toCharArray();
            contentHandler.characters(cdata, 0, cdata.length);
            lexicalHandler.endCDATA();//  ww  w .  j  av  a2  s  . co  m
            break;

        case Node.NAMESPACE_NODE:
            Namespace declared = (Namespace) node;
            // System.err.println( "Declared namespace: " +
            // declared.getPrefix() + ":" + declared.getURI() );
            namespaces.put(declared.getPrefix(), declared.getURI());
            // if ( declared.getURI().equals( TAL_NAMESPACE_URI ) ) {
            // this.talNamespacePrefix = declared.getPrefix();
            // }
            // else if (declared.getURI().equals( METAL_NAMESPACE_URI ) ) {
            // this.metalNamespacePrefix = declared.getPrefix();
            // }
            break;

        case Node.ATTRIBUTE_NODE:
            // Already handled
            break;

        case Node.DOCUMENT_TYPE_NODE:
        case Node.ENTITY_REFERENCE_NODE:
        case Node.PROCESSING_INSTRUCTION_NODE:
        default:
            // System.err.println( "WARNING: Node type not supported: " +
            // node.getNodeTypeName() );
        }
    }
}

From source file:com.weibo.wesync.notify.xml.XMLProperties.java

License:Open Source License

/**
 * Sets a property to an array of values. Multiple values matching the same property
 * is mapped to an XML file as multiple elements containing each value.
 * For example, using the name "foo.bar.prop", and the value string array containing
 * {"some value", "other value", "last value"} would produce the following XML:
 * <pre>/*  w w  w  . jav  a  2s  .  c om*/
 * &lt;foo&gt;
 *     &lt;bar&gt;
 *         &lt;prop&gt;some value&lt;/prop&gt;
 *         &lt;prop&gt;other value&lt;/prop&gt;
 *         &lt;prop&gt;last value&lt;/prop&gt;
 *     &lt;/bar&gt;
 * &lt;/foo&gt;
 * </pre>
 *
 * @param name the name of the property.
 * @param values the values for the property (can be empty but not null).
 */
public void setProperties(String name, List<String> values) {
    String[] propName = parsePropertyName(name);
    // Search for this property by traversing down the XML heirarchy,
    // stopping one short.
    Element element = document.getRootElement();
    for (int i = 0; i < propName.length - 1; i++) {
        // If we don't find this part of the property in the XML heirarchy
        // we add it as a new node
        if (element.element(propName[i]) == null) {
            element.addElement(propName[i]);
        }
        element = element.element(propName[i]);
    }
    String childName = propName[propName.length - 1];
    // We found matching property, clear all children.
    List<Element> toRemove = new ArrayList<Element>();
    Iterator iter = element.elementIterator(childName);
    while (iter.hasNext()) {
        toRemove.add((Element) iter.next());
    }
    for (iter = toRemove.iterator(); iter.hasNext();) {
        element.remove((Element) iter.next());
    }
    // Add the new children.
    for (String value : values) {
        Element childElement = element.addElement(childName);
        if (value.startsWith("<![CDATA[")) {
            Iterator it = childElement.nodeIterator();
            while (it.hasNext()) {
                Node node = (Node) it.next();
                if (node instanceof CDATA) {
                    childElement.remove(node);
                    break;
                }
            }
            childElement.addCDATA(value.substring(9, value.length() - 3));
        } else {
            childElement.setText(StringEscapeUtils.escapeXml(value));
        }
    }
    saveProperties();

    // Generate event.
    Map<String, Object> params = new HashMap<String, Object>();
    params.put("value", values);
}

From source file:com.weibo.wesync.notify.xml.XMLProperties.java

License:Open Source License

/**
 * Sets the value of the specified property. If the property doesn't
 * currently exist, it will be automatically created.
 *
 * @param name  the name of the property to set.
 * @param value the new value for the property.
 *///from   ww  w .j  a v  a 2s.com
public synchronized void setProperty(String name, String value) {
    if (!StringEscapeUtils.escapeXml(name).equals(name)) {
        throw new IllegalArgumentException("Property name cannot contain XML entities.");
    }
    if (name == null) {
        return;
    }
    if (value == null) {
        value = "";
    }

    // Set cache correctly with prop name and value.
    propertyCache.put(name, value);

    String[] propName = parsePropertyName(name);
    // Search for this property by traversing down the XML heirarchy.
    Element element = document.getRootElement();
    for (String aPropName : propName) {
        // If we don't find this part of the property in the XML heirarchy
        // we add it as a new node
        if (element.element(aPropName) == null) {
            element.addElement(aPropName);
        }
        element = element.element(aPropName);
    }
    // Set the value of the property in this node.
    if (value.startsWith("<![CDATA[")) {
        Iterator it = element.nodeIterator();
        while (it.hasNext()) {
            Node node = (Node) it.next();
            if (node instanceof CDATA) {
                element.remove(node);
                break;
            }
        }
        element.addCDATA(value.substring(9, value.length() - 3));
    } else {
        element.setText(value);
    }
    // Write the XML properties to disk
    saveProperties();

    // Generate event.
    Map<String, Object> params = new HashMap<String, Object>();
    params.put("value", value);
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.io.reader.SemCorXMLReader.java

License:Apache License

@SuppressWarnings("unchecked")
private StringBuffer processSentences(JCas jCas, Element element, int offset, String idPrefix)
        throws CollectionException {
    StringBuffer sentenceText = new StringBuffer();
    for (Iterator<Element> sentenceIterator = element.elementIterator(ELEMENT_SENTENCE); sentenceIterator
            .hasNext();) {// www  .java  2  s  .co  m
        Element sentence = sentenceIterator.next();
        Sentence sentenceAnnotation = new Sentence(jCas);
        sentenceAnnotation.setBegin(offset);
        String sentenceId = sentence.attributeValue(ATTR_SNUM);
        int wordFormCount = 0;

        for (Iterator<Node> nodeIterator = sentence.nodeIterator(); nodeIterator.hasNext();) {
            Node node = nodeIterator.next();
            String nodeText = node.getText().replace('\n', ' ');
            int oldOffset = offset;
            offset += nodeText.length();
            sentenceText.append(nodeText);

            if (node.getName() == null) {
                continue;
            }

            if (node.getName().equals(ELEMENT_PUNCTUATION)) {
                logger.trace("Found punctuation " + node.getText());
                continue;
            }

            if (node.getName().equals(ELEMENT_WORDFORM) == false) {
                throw new CollectionException("unknown_element", new Object[] { node.getName() });
            }

            // Find or construct a unique ID for this word form
            wordFormCount++;
            totalWordFormCount++;
            Element wordForm = (Element) node;
            String wordFormId = wordForm.attributeValue(ATTR_ID);
            if (wordFormId == null) {
                wordFormId = idPrefix + ".s" + sentenceId + ".w" + wordFormCount;
            }
            logger.trace("Found wf id: " + wordFormId);

            String lemma = wordForm.attributeValue(ATTR_LEMMA);
            String pos = wordForm.attributeValue(ATTR_POS);

            // write DKPro Core annotations Token, Lemma, and POS
            if (shouldWriteCoreAnnotations) {
                Lemma lemmaAnno = null;
                if (lemma != null) {
                    lemmaAnno = new Lemma(jCas, offset, oldOffset + nodeText.length());
                    lemmaAnno.setValue(lemma);
                    lemmaAnno.addToIndexes();
                }

                de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS posAnno = null;
                if (pos != null) {
                    Type posTag = mappingProvider.getTagType(pos);
                    posAnno = (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) jCas.getCas()
                            .createAnnotation(posTag, oldOffset, oldOffset + nodeText.length());
                    posAnno.setPosValue(pos);
                    posAnno.addToIndexes();
                }

                Token tokenAnno = new Token(jCas, oldOffset, oldOffset + nodeText.length());
                tokenAnno.setLemma(lemmaAnno);
                tokenAnno.setPos(posAnno);
                tokenAnno.addToIndexes();
            }

            // Skip <wf> elements which are not marked as "done"
            if (skipUndone == true && wordForm.attributeValue(ATTR_CMD).equals(VAL_DONE) == false) {
                logger.debug("Skipping wf " + wordFormId + ": not marked as 'done'");
                continue;
            }

            // Skip <wf> elements for which semantic tags could not be
            // assigned
            if (skipUnassignable == true && wordForm.attributeValue(ATTR_OT) != null) {
                logger.debug("Skipping wf " + wordFormId + ": ot=" + wordForm.attributeValue(ATTR_OT));
                continue;
            }

            // Find the number of valid sense tags for this word form.
            // Tags with a wnsn attribute value of "0"
            // (or "-1" according to some specifications) could not be
            // mapped and so are skipped.
            String wnsn = wordForm.attributeValue(ATTR_WNSN);
            if (skipWithoutWnsn == true && wnsn == null) {
                logger.debug("Skipping wf " + wordFormId + ": no wnsn");
                continue;
            }
            int totalValidWf = 0;
            String wnsns[] = wnsn.split(";");
            for (String s : wnsns) {
                if (isValidWnsn(s)) {
                    totalValidWf++;
                }
            }
            if (skipWithoutWnsn == true && totalValidWf == 0) {
                logger.debug("Skipping wf " + wordFormId + ": wnsn=" + wordForm.attributeValue(ATTR_WNSN));
                continue;
            }

            // Skip word forms without a lemma
            if (skipWithoutLemma == true && lemma == null) {
                logger.warn("Sipping wf " + wordFormId + ": no lemma");
                continue;
            }

            // Skip word forms without a POS
            if (skipWithoutPos == true && pos == null) {
                logger.warn("Skipping " + wordFormId + ": no pos");
                continue;
            }
            try {
                pos = semCorPosToPOS(pos).toString();
            } catch (IllegalArgumentException e) {
                logger.warn("Skipping wf " + wordFormId + ": unrecognized pos=" + pos);
                continue;
            }

            // Create the necessary WSDItem and LexicalItemConstituent
            // annotations for this word form
            LexicalItemConstituent c = newLexicalItemConstituent(jCas, wordFormId, ELEMENT_WORDFORM, oldOffset,
                    nodeText.length());
            WSDItem w = newWsdItem(jCas, wordFormId, oldOffset, nodeText.length(), pos, lemma);
            w.setConstituents(new FSArray(jCas, 1));
            w.setConstituents(0, c);

            // Get an array of sense tags. Sense tags are found
            // in the lexsn attribute and are separated with
            // semicolons. Sometimes the head_word field contains
            // a superfluous character in parentheses which must
            // be removed. (These quirks are not documented in
            // the SemCor file format specification.)
            String lexsns[] = wordForm.attributeValue(ATTR_LEXSN).replaceAll("\\(.\\)", "").split(";");
            FSArray senseArray = new FSArray(jCas, totalValidWf);
            int validWfCount = 0;
            for (int i = 0; i < lexsns.length; i++) {
                if (isValidWnsn(wnsns[i])) {
                    Sense sense = new Sense(jCas);
                    sense.setId(lemma + "%" + lexsns[i]);
                    sense.setConfidence(1.0);
                    sense.addToIndexes();
                    senseArray.set(validWfCount++, sense);
                }
            }

            WSDResult wsdResult = new WSDResult(jCas, oldOffset, oldOffset + nodeText.length());
            wsdResult.setWsdItem(w);
            wsdResult.setSenses(senseArray);
            wsdResult.setSenseInventory(senseInventory);
            wsdResult.setDisambiguationMethod(DISAMBIGUATION_METHOD_NAME);
            wsdResult.addToIndexes();
        }

        sentenceAnnotation.setEnd(offset);
        sentenceAnnotation.addToIndexes();
    }
    return sentenceText;
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.io.reader.WebCAGeXMLReader.java

License:Apache License

@SuppressWarnings("unchecked")
private StringBuffer processText(JCas jCas, Element text) throws CollectionException {
    StringBuffer documentText = new StringBuffer();
    int offset = 0;

    // Loop over all nodes to get the document text in order
    for (Iterator<Node> nodeIterator = text.nodeIterator(); nodeIterator.hasNext();) {

        Node node = nodeIterator.next();
        String nodeText = node.getText().replace('\n', ' ');
        String nodeName = node.getName();

        // TODO: For now we ignore satellites. We should add support for
        // them.//from www . j  a v a 2  s  .  co m
        if (nodeName == null || nodeName.equals(ELEMENT_SAT)) {
            offset += nodeText.length();
            documentText.append(nodeText);
            continue;
        }

        // If the node is a head, create a LexicalItemConstituent and a
        // WSDItem
        else if (nodeName.equals(ELEMENT_HEAD)) {
            Element head = (Element) node;
            String headId = head.attributeValue(ATTR_ID);
            String lemma = head.attributeValue(ATTR_LEMMA);

            logger.trace("Reading instance " + headId);

            // Skip word forms without a POS
            String pos = head.attributeValue(ATTR_POS);
            if (pos == null) {
                logger.warn("No POS provided for " + headId + "; skipping");
                continue;
            }
            try {
                pos = webCAGePosToPOS(pos).toString();
            } catch (IllegalArgumentException e) {
                logger.warn("Unrecognized POS " + pos + " provided for " + headId + "; skipping");
                continue;
            }

            // Create the necessary WSDItem and LexicalItemConstituent
            // annotations for this word form
            LexicalItemConstituent c = newLexicalItemConstituent(jCas, headId, ELEMENT_HEAD, offset,
                    nodeText.length());
            WSDItem w = newWsdItem(jCas, headId, offset, nodeText.length(), pos, lemma);
            w.setConstituents(new FSArray(jCas, 1));
            w.setConstituents(0, c);

            // Get an array of lexical unit IDs (LUIDs). LUIDs are found
            // in the luids attribute and are separated with
            // # characters.
            String luids[] = head.attributeValue(ATTR_LUIDS).split("#");
            FSArray senseArray = new FSArray(jCas, luids.length);
            for (int i = 0; i < luids.length; i++) {
                Sense sense = new Sense(jCas);
                sense.setId(luids[i].substring(1));
                sense.setConfidence(1.0);
                sense.addToIndexes();
                senseArray.set(i, sense);
            }

            WSDResult wsdResult = new WSDResult(jCas);
            wsdResult.setWsdItem(w);
            wsdResult.setSenses(senseArray);
            wsdResult.setSenseInventory(senseInventory);
            wsdResult.setDisambiguationMethod(DISAMBIGUATION_METHOD_NAME);
            wsdResult.addToIndexes();

        }

        // If the node is any other element, something is wrong
        else if (node.getNodeTypeName().equals("Entity") == false) {
            throw new CollectionException("unknown_element", new Object[] { node.getName() });
        }

        offset += nodeText.length();
        documentText.append(nodeText);
    }
    return documentText;
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Semeval1AWReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from www .  jav a  2  s. co m
public void getNext(JCas jCas) throws IOException, CollectionException {
    int offset = 0, numSentences = 0;
    String s = "";
    Element text = textIterator.next();

    for (Iterator<Element> sentenceIterator = text.elementIterator(SENTENCE_ELEMENT_NAME); sentenceIterator
            .hasNext();) {

        Element sentence = sentenceIterator.next();
        Sentence sentenceAnnotation = new Sentence(jCas);
        sentenceAnnotation.setBegin(offset);

        for (Iterator<Node> nodeIterator = sentence.nodeIterator(); nodeIterator.hasNext();) {

            Node node = nodeIterator.next();
            String nodeText = node.getText().replace('\n', ' ');

            // If the node is a head, create a LexicalItemConstituent and a
            // WSDItem
            if (node.getName() != null && node.getName().equals(HEAD_ELEMENT_NAME)) {
                Element head = (Element) node;
                String id = head.attributeValue(ID_ATTRIBUTE_NAME);

                LexicalItemConstituent c = newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset,
                        nodeText.length());
                WSDItem w = newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(),
                        head.attributeValue(POS_ATTRIBUTE_NAME), head.attributeValue(LEMMA_ATTRIBUTE_NAME));
                w.setConstituents(new FSArray(jCas, 1));
                w.setConstituents(0, c);
            } else if (node.getName() != null) {
                throw new CollectionException("unknown_element", new Object[] { node.getName() });
            }

            offset += nodeText.length();
            s += nodeText;
        }

        sentenceAnnotation.setEnd(offset);
        sentenceAnnotation.addToIndexes();
        numSentences++;
    }

    // The Semeval-1 DTD requires each text to have at least one sentence
    if (numSentences == 0) {
        throw new CollectionException("element_not_found",
                new Object[] { SENTENCE_ELEMENT_NAME, TEXT_ELEMENT_NAME });
    }

    jCas.setDocumentText(s);

    try {
        setDocumentMetadata(jCas, text.attributeValue(ID_ATTRIBUTE_NAME));
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }

    textCount++;
}