Example usage for org.w3c.dom Node getTextContent

Introduction

In this page you can find the example usage for org.w3c.dom Node getTextContent.

Prototype

public String getTextContent() throws DOMException;

Source Link

Document

This attribute returns the text content of this node and its descendants.

Usage

From source file:org.eclipse.lyo.testsuite.server.oslcv1tests.CreationAndUpdateTests.java

public static Collection<Object[]> getReferencedUrls(String base)
        throws IOException, XPathException, ParserConfigurationException, SAXException {
    Properties setupProps = SetupProperties.setup(null);
    String userId = setupProps.getProperty("userId");
    String pw = setupProps.getProperty("pw");

    HttpResponse resp = OSLCUtils.getResponseFromUrl(base, base, new UsernamePasswordCredentials(userId, pw),
            OSLCConstants.CT_DISC_CAT_XML + ", " + OSLCConstants.CT_DISC_DESC_XML);

    //If our 'base' is a ServiceDescription, find and add the factory service url
    if (resp.getEntity().getContentType().getValue().contains(OSLCConstants.CT_DISC_DESC_XML)) {
        Document baseDoc = OSLCUtils.createXMLDocFromResponseBody(EntityUtils.toString(resp.getEntity()));
        Node factoryUrl = (Node) OSLCUtils.getXPath().evaluate("//oslc_cm:factory/oslc_cm:url", baseDoc,
                XPathConstants.NODE);
        Collection<Object[]> data = new ArrayList<Object[]>();
        data.add(new Object[] { factoryUrl.getTextContent() });
        return data;
    }/* www. j  a  va 2  s.c  o m*/

    Document baseDoc = OSLCUtils.createXMLDocFromResponseBody(EntityUtils.toString(resp.getEntity()));

    //ArrayList to contain the urls from all of the SPCs
    Collection<Object[]> data = new ArrayList<Object[]>();

    //Get all the ServiceDescriptionDocuments from this ServiceProviderCatalog
    NodeList sDescs = (NodeList) OSLCUtils.getXPath().evaluate("//oslc_disc:services/@rdf:resource", baseDoc,
            XPathConstants.NODESET);
    for (int i = 0; i < sDescs.getLength(); i++) {
        Collection<Object[]> subCollection = getReferencedUrls(sDescs.item(i).getNodeValue());
        Iterator<Object[]> iter = subCollection.iterator();
        while (iter.hasNext()) {
            data.add(iter.next());
        }
    }

    //Get all ServiceProviderCatalog urls from the base document in order to recursively add all the
    //simple query services from the eventual service description documents from them as well.
    NodeList spcs = (NodeList) OSLCUtils.getXPath().evaluate(
            "//oslc_disc:entry/oslc_disc:ServiceProviderCatalog/@rdf:about", baseDoc, XPathConstants.NODESET);
    for (int i = 0; i < spcs.getLength(); i++) {
        if (!spcs.item(i).getNodeValue().equals(base)) {
            Collection<Object[]> subCollection = getReferencedUrls(spcs.item(i).getNodeValue());
            Iterator<Object[]> iter = subCollection.iterator();
            while (iter.hasNext()) {
                data.add(iter.next());
            }
        }
    }
    return data;
}

From source file:com.jkoolcloud.tnt4j.streams.parsers.ActivityXmlParser.java

/**
 * Resolves DOM node contained textual data and formats it using provided locator.
 *
 * @param locator/*  w ww .  ja  va2s  .  co m*/
 *            locator instance to alter using XML attributes contained data type, format and units used to format
 *            resolved value
 * @param node
 *            DOM node to collect textual data
 * @return resolved textual value formatted based on the locator's formatting properties
 * @throws ParseException
 *             if exception occurs applying locator format properties to specified value
 */
protected static Object getTextContent(ActivityFieldLocator locator, Node node) throws ParseException {
    String strValue = node.getTextContent();
    Node attrsNode = node;

    if (node instanceof Attr) {
        Attr attr = (Attr) node;

        attrsNode = attr.getOwnerElement();
    }

    // Get list of attributes and their values for
    // current element
    NamedNodeMap attrsMap = attrsNode == null ? null : attrsNode.getAttributes();

    Node attr;
    String attrVal;
    ActivityFieldLocator locCopy = locator.clone();
    if (attrsMap != null && attrsMap.getLength() > 0) {
        attr = attrsMap.getNamedItem(DATA_TYPE_ATTR);
        attrVal = attr == null ? null : attr.getTextContent();
        if (StringUtils.isNotEmpty(attrVal)) {
            locCopy.setDataType(ActivityFieldDataType.valueOf(attrVal));
        }

        attr = attrsMap.getNamedItem(FORMAT_ATTR);
        attrVal = attr == null ? null : attr.getTextContent();
        if (StringUtils.isNotEmpty(attrVal)) {
            attr = attrsMap.getNamedItem(LOCALE_ATTR);
            String attrLVal = attr == null ? null : attr.getTextContent();

            locCopy.setFormat(attrVal, StringUtils.isEmpty(attrLVal) ? locator.getLocale() : attrLVal);
        }

        attr = attrsMap.getNamedItem(UNITS_ATTR);
        attrVal = attr == null ? null : attr.getTextContent();
        if (StringUtils.isNotEmpty(attrVal)) {
            locCopy.setUnits(attrVal);
        }
    }

    return locCopy.formatValue(strValue.trim());
}

From source file:com.twentyn.patentExtractor.PatentDocument.java

public static PatentDocument patentDocumentFromXMLStream(InputStream iStream)
        throws IOException, ParserConfigurationException, SAXException, TransformerConfigurationException,
        TransformerException, XPathExpressionException {

    // Create XPath objects for validating that this document is actually a patent.
    XPath xpath = Util.getXPathFactory().newXPath();
    XPathExpression versionXPath = xpath.compile(PATH_DTD_VERSION);
    XPathExpression versionXPathApp = xpath.compile(PATH_DTD_VERSION_APP);

    DocumentBuilderFactory docFactory = Util.mkDocBuilderFactory();
    DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
    Document doc = docBuilder.parse(iStream);

    Util.DocumentType docType = Util.identifyDocType(doc);
    if (docType != Util.DocumentType.PATENT && docType != Util.DocumentType.APPLICATION) {
        LOGGER.warn("Found unexpected document type: " + docType);
        return null;
    }/*from ww  w .  j  a  v  a  2 s  . co  m*/

    boolean isApplication = docType == Util.DocumentType.APPLICATION;
    // Yes this is in fact the way suggested by the XPath API.
    String version;
    if (!isApplication) {
        version = (String) versionXPath.evaluate(doc, XPathConstants.STRING);
    } else {
        version = (String) versionXPathApp.evaluate(doc, XPathConstants.STRING);
    }

    if (version == null || !VERSION_MAP.containsKey(version)) {
        LOGGER.warn(String.format("Unrecognized patent DTD version: %s", version));
        return null;
    }

    HashMap<String, String> paths = VERSION_MAP.get(version);

    /* Create XPath objects for extracting the fields of interest based on the version information.
     * TODO: extract these into some sharable, thread-safe place, maybe via dependency injection.
     */
    XPathExpression idXPath = xpath.compile(paths.get(PATH_KEY_FILE_ID));
    XPathExpression dateXPath = xpath.compile(paths.get(PATH_KEY_DATE));
    XPathExpression titleXPath = xpath.compile(paths.get(PATH_KEY_TITLE));
    XPathExpression classificationXPath = xpath.compile(paths.get(PATH_KEY_MAIN_CLASSIFICATION));
    XPathExpression furtherClassificationsXPath = xpath.compile(paths.get(PATH_KEY_FURTHER_CLASSIFICATIONS));
    XPathExpression searchedClassificationsXPath = xpath.compile(paths.get(PATH_KEY_SEARCHED_CLASSIFICATIONS));

    String fileId = (String) idXPath.evaluate(doc, XPathConstants.STRING);
    String date = (String) dateXPath.evaluate(doc, XPathConstants.STRING);
    NodeList titleNodes = (NodeList) titleXPath.evaluate(doc, XPathConstants.NODESET);
    String title = StringUtils.join(" ", extractTextFromHTML(docBuilder, titleNodes));
    String classification = (String) classificationXPath.evaluate(doc, XPathConstants.STRING);
    NodeList furtherClassificationNodes = (NodeList) furtherClassificationsXPath.evaluate(doc,
            XPathConstants.NODESET);
    ArrayList<String> furtherClassifications = null;
    if (furtherClassificationNodes != null) {
        furtherClassifications = new ArrayList<>(furtherClassificationNodes.getLength());
        for (int i = 0; i < furtherClassificationNodes.getLength(); i++) {
            Node n = furtherClassificationNodes.item(i);
            String txt = n.getTextContent();
            if (txt != null) {
                furtherClassifications.add(i, txt);
            }
        }
    } else {
        furtherClassifications = new ArrayList<>(0);
    }

    NodeList otherClassificationNodes = (NodeList) searchedClassificationsXPath.evaluate(doc,
            XPathConstants.NODESET);
    ArrayList<String> otherClassifications = null;
    if (otherClassificationNodes != null) {
        otherClassifications = new ArrayList<>(otherClassificationNodes.getLength());
        for (int i = 0; i < otherClassificationNodes.getLength(); i++) {
            Node n = otherClassificationNodes.item(i);
            String txt = n.getTextContent();
            if (txt != null) {
                otherClassifications.add(i, txt);
            }
        }
    } else {
        otherClassifications = new ArrayList<>(0);
    }

    // Extract text content for salient document paths.
    List<String> allTextList = getRelevantDocumentText(docBuilder, PATHS_TEXT, xpath, doc);
    List<String> claimsTextList = getRelevantDocumentText(docBuilder, new String[] { PATH_CLAIMS }, xpath, doc);

    return new PatentDocument(fileId, date, title, classification, furtherClassifications, otherClassifications,
            allTextList, claimsTextList, isApplication);
}

From source file:Main.java

/**
 * Extract default values of variables defined in the DOM subtree below node.
 * Default values are used to populate the variableDefs map.  Variables
 * already defined in this map will NOT be modified.
 *
 * @param node root node of DOM subtree to extract default values from.
 * @param variableDefs map which default values will be added to.
 *//*from www  .ja  va  2  s  .  c  o m*/
public static void extractVariableDefaults(final Node node, Map<String, String> variableDefs) {
    switch (node.getNodeType()) {
    case Node.ELEMENT_NODE:
        final Element element = (Element) node;
        final NamedNodeMap attrs = element.getAttributes();
        for (int i = 0; i < attrs.getLength(); i++) {
            Attr attr = (Attr) attrs.item(i);
            extractVariableDefaultsFromString(attr.getValue(), variableDefs);
        }
        break;

    case Node.CDATA_SECTION_NODE:
        String content = node.getTextContent();
        extractVariableDefaultsFromString(content, variableDefs);
        break;

    default:
        break;
    }

    final NodeList children = node.getChildNodes();
    for (int childIndex = 0; childIndex < children.getLength(); childIndex++)
        extractVariableDefaults(children.item(childIndex), variableDefs);
}

From source file:com.github.sevntu.checkstyle.internal.ChecksTest.java

private static void validateSonarProperties(Class<?> module, Set<Node> parameters) {
    final String moduleName = module.getName();
    final Set<String> properties = getFinalProperties(module);

    for (Node parameter : parameters) {
        final NamedNodeMap attributes = parameter.getAttributes();
        final Node paramKeyNode = attributes.getNamedItem("key");

        Assert.assertNotNull(moduleName + " requires a key for unknown parameter in sonar", paramKeyNode);

        final String paramKey = paramKeyNode.getTextContent();

        Assert.assertFalse(moduleName + " requires a valid key for unknown parameter in sonar",
                paramKey.isEmpty());/*w w w. j a v a 2s .c  o  m*/

        Assert.assertTrue(moduleName + " has an unknown parameter in sonar: " + paramKey,
                properties.remove(paramKey));
    }

    for (String property : properties) {
        Assert.fail(moduleName + " parameter not found in sonar: " + property);
    }
}

From source file:edu.illinois.cs.cogcomp.utils.Utils.java

/**
 * Used for reading data from the NEWS2015 dataset.
 * @param fname/*from w  w  w  .java  2 s . c om*/
 * @return
 * @throws ParserConfigurationException
 * @throws IOException
 * @throws SAXException
 */
public static List<MultiExample> readNEWSData(String fname)
        throws ParserConfigurationException, IOException, SAXException {
    File file = new File(fname);
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    DocumentBuilder db = dbf.newDocumentBuilder();
    Document document = db.parse(file);

    NodeList nl = document.getElementsByTagName("Name");

    List<MultiExample> examples = new ArrayList<>();

    for (int i = 0; i < nl.getLength(); i++) {
        Node n = nl.item(i);

        NodeList sourceandtargets = n.getChildNodes();
        MultiExample me = null;
        for (int j = 0; j < sourceandtargets.getLength(); j++) {

            Node st = sourceandtargets.item(j);
            if (st.getNodeName().equals("SourceName")) {
                me = new MultiExample(st.getTextContent().toLowerCase(), new ArrayList<String>());
            } else if (st.getNodeName().equals("TargetName")) {
                if (me != null) {
                    me.addTransliteratedWord(st.getTextContent());
                }
            }
        }
        examples.add(me);
    }

    return examples;
}

From source file:Main.java

/**
 * Build {@link Map} of namespace URIs to prefixes.
 * // www . j  a v a 2  s .c  o  m
 * @param root
 *          {@link Element} to get namespaces and prefixes from.
 * @return {@link Map} of namespace URIs to prefixes.
 * @since 8.1
 */
private static final Map<String, String> buildNamespacePrefixMap(final Element root) {
    final HashMap<String, String> namespacePrefixMap = new HashMap<>();

    //Look for all of the attributes of cache that start with
    //xmlns
    NamedNodeMap attributes = root.getAttributes();
    for (int i = 0; i < attributes.getLength(); i++) {
        Node item = attributes.item(i);
        if (item.getNodeName().startsWith("xmlns")) {
            //Anything after the colon is the prefix
            //eg xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
            //has a prefix of xsi
            String[] splitName = item.getNodeName().split(":");
            String prefix;
            if (splitName.length > 1) {
                prefix = splitName[1];
            } else {
                prefix = "";
            }
            String uri = item.getTextContent();
            namespacePrefixMap.put(uri, prefix);
        }
    }

    return namespacePrefixMap;
}

From source file:de.codesourcery.eve.apiclient.utils.XMLParseHelper.java

public static String selectNodeValue(Document doc, XPathExpression expr, boolean isRequired) {
    final Node n = selectNode(doc, expr, isRequired);

    if (n == null) {
        return null;
    }//  w  w w.ja v a  2s. com

    final String result = n.getTextContent();
    if (StringUtils.isBlank(result)) {
        if (isRequired) {
            throw new UnparseableResponseException(
                    "Selected node '" + n.getNodeName() + "' does not contain a value");
        }
        return null;
    }
    return result;
}

From source file:edu.lternet.pasta.dml.parser.document.DocumentDataPackageParser.java

public static Map document2Map(Document doc, Map attributeXPaths) {
    Map record = new OrderedMap();

    try {//from   w w  w. j  a va  2  s.  c o m

        //go through the list of attribute labels (key to xpath values)
        Iterator xPathIter = attributeXPaths.keySet().iterator();
        while (xPathIter.hasNext()) {
            String attributeLabel = (String) xPathIter.next();
            String attributeXPath = (String) attributeXPaths.get(attributeLabel);

            //handle NodeList, not just single Node
            NodeList attributeNodeList = XMLUtilities.getNodeListWithXPath(doc.getDocumentElement(),
                    attributeXPath);

            //include placeholders for those non existent attributes but include null values
            if (attributeNodeList == null) {
                log.debug("no nodes found for xPath: " + attributeXPath);
                record.put(attributeLabel, null);
                log.debug("added null placeholder for attribute: " + attributeLabel);
                continue;
            }
            //get the value[s] for the attribute
            for (int i = 0; i < attributeNodeList.getLength(); i++) {

                //get the node
                Node attributeNode = attributeNodeList.item(i);

                //get the text value of the node
                //TODO should we use DOM level 3 and assume java 1.5?
                String nodeTextContent = null; //attributeNode.getTextContent();
                nodeTextContent = attributeNode.getTextContent();
                //               if (attributeNode.getFirstChild() != null && attributeNode.getFirstChild().getNodeType() == Node.TEXT_NODE) {
                //                  nodeTextContent = attributeNode.getFirstChild().getNodeValue();
                //               }

                //add the attribute to the Map, taking care to handle multiples
                String columnLabel = attributeLabel;
                if (record.containsKey(columnLabel)) {
                    if (i == 1) {
                        //get the first original label without the number and reassign it with "_1"
                        Object firstValue = record.get(columnLabel);
                        //record.remove(columnLabel);
                        record.put(columnLabel + "_" + i, firstValue);
                    }
                    columnLabel = columnLabel + "_" + (i + 1);
                }
                record.put(columnLabel, nodeTextContent);

                log.debug("added flat attribute: " + columnLabel + "=" + nodeTextContent);

            }

        }
    } catch (Exception e) {
        log.error("could not flatten attributes in document: " + e.getMessage());
        e.printStackTrace();
    }

    return record;
}

From source file:Main.java

/**
 * Prints a textual representation of the given node to the specified PrintStream.
 *
 * @param  n    Node that is to be printed.
 * @param  out  The PrintStream to which the node is to be printed.
 * @pre    n != null && out != null
 */// w  ww  . j  a  v  a2s.c o  m
public static void printXMLNode(Node n, PrintStream out) {
    switch (n.getNodeType()) {
    case Node.DOCUMENT_NODE:
        out.println("DOC_ROOT");
        break;

    case Node.ELEMENT_NODE:
        out.println("<" + ((Element) n).getTagName() + ">");
        break;

    case Node.ATTRIBUTE_NODE:
        out.println("@" + ((Attr) n).getName());
        break;

    case Node.TEXT_NODE:
        out.println("\"" + ((Text) n).getWholeText().trim() + "\"");
        break;

    case Node.COMMENT_NODE:
        out.println("COMMENT: \"" + n.getTextContent().trim() + "\"");
        break;

    default:
        out.println("Unknown node type: " + n.getNodeType());
    }
}