Example usage for org.w3c.dom Node getTextContent

List of usage examples for org.w3c.dom Node getTextContent

Introduction

In this page you can find the example usage for org.w3c.dom Node getTextContent.

Prototype

public String getTextContent() throws DOMException;

Source Link

Document

This attribute returns the text content of this node and its descendants.

Usage

From source file:org.eclipse.lyo.testsuite.server.oslcv1tests.CreationAndUpdateTests.java

public static Collection<Object[]> getReferencedUrls(String base)
        throws IOException, XPathException, ParserConfigurationException, SAXException {
    Properties setupProps = SetupProperties.setup(null);
    String userId = setupProps.getProperty("userId");
    String pw = setupProps.getProperty("pw");

    HttpResponse resp = OSLCUtils.getResponseFromUrl(base, base, new UsernamePasswordCredentials(userId, pw),
            OSLCConstants.CT_DISC_CAT_XML + ", " + OSLCConstants.CT_DISC_DESC_XML);

    //If our 'base' is a ServiceDescription, find and add the factory service url
    if (resp.getEntity().getContentType().getValue().contains(OSLCConstants.CT_DISC_DESC_XML)) {
        Document baseDoc = OSLCUtils.createXMLDocFromResponseBody(EntityUtils.toString(resp.getEntity()));
        Node factoryUrl = (Node) OSLCUtils.getXPath().evaluate("//oslc_cm:factory/oslc_cm:url", baseDoc,
                XPathConstants.NODE);
        Collection<Object[]> data = new ArrayList<Object[]>();
        data.add(new Object[] { factoryUrl.getTextContent() });
        return data;
    }/* www. j  a  va 2  s.c  o m*/

    Document baseDoc = OSLCUtils.createXMLDocFromResponseBody(EntityUtils.toString(resp.getEntity()));

    //ArrayList to contain the urls from all of the SPCs
    Collection<Object[]> data = new ArrayList<Object[]>();

    //Get all the ServiceDescriptionDocuments from this ServiceProviderCatalog
    NodeList sDescs = (NodeList) OSLCUtils.getXPath().evaluate("//oslc_disc:services/@rdf:resource", baseDoc,
            XPathConstants.NODESET);
    for (int i = 0; i < sDescs.getLength(); i++) {
        Collection<Object[]> subCollection = getReferencedUrls(sDescs.item(i).getNodeValue());
        Iterator<Object[]> iter = subCollection.iterator();
        while (iter.hasNext()) {
            data.add(iter.next());
        }
    }

    //Get all ServiceProviderCatalog urls from the base document in order to recursively add all the
    //simple query services from the eventual service description documents from them as well.
    NodeList spcs = (NodeList) OSLCUtils.getXPath().evaluate(
            "//oslc_disc:entry/oslc_disc:ServiceProviderCatalog/@rdf:about", baseDoc, XPathConstants.NODESET);
    for (int i = 0; i < spcs.getLength(); i++) {
        if (!spcs.item(i).getNodeValue().equals(base)) {
            Collection<Object[]> subCollection = getReferencedUrls(spcs.item(i).getNodeValue());
            Iterator<Object[]> iter = subCollection.iterator();
            while (iter.hasNext()) {
                data.add(iter.next());
            }
        }
    }
    return data;
}

From source file:com.jkoolcloud.tnt4j.streams.parsers.ActivityXmlParser.java

/**
 * Resolves DOM node contained textual data and formats it using provided locator.
 *
 * @param locator/*  w ww .  ja  va2s  .  co m*/
 *            locator instance to alter using XML attributes contained data type, format and units used to format
 *            resolved value
 * @param node
 *            DOM node to collect textual data
 * @return resolved textual value formatted based on the locator's formatting properties
 * @throws ParseException
 *             if exception occurs applying locator format properties to specified value
 */
protected static Object getTextContent(ActivityFieldLocator locator, Node node) throws ParseException {
    String strValue = node.getTextContent();
    Node attrsNode = node;

    if (node instanceof Attr) {
        Attr attr = (Attr) node;

        attrsNode = attr.getOwnerElement();
    }

    // Get list of attributes and their values for
    // current element
    NamedNodeMap attrsMap = attrsNode == null ? null : attrsNode.getAttributes();

    Node attr;
    String attrVal;
    ActivityFieldLocator locCopy = locator.clone();
    if (attrsMap != null && attrsMap.getLength() > 0) {
        attr = attrsMap.getNamedItem(DATA_TYPE_ATTR);
        attrVal = attr == null ? null : attr.getTextContent();
        if (StringUtils.isNotEmpty(attrVal)) {
            locCopy.setDataType(ActivityFieldDataType.valueOf(attrVal));
        }

        attr = attrsMap.getNamedItem(FORMAT_ATTR);
        attrVal = attr == null ? null : attr.getTextContent();
        if (StringUtils.isNotEmpty(attrVal)) {
            attr = attrsMap.getNamedItem(LOCALE_ATTR);
            String attrLVal = attr == null ? null : attr.getTextContent();

            locCopy.setFormat(attrVal, StringUtils.isEmpty(attrLVal) ? locator.getLocale() : attrLVal);
        }

        attr = attrsMap.getNamedItem(UNITS_ATTR);
        attrVal = attr == null ? null : attr.getTextContent();
        if (StringUtils.isNotEmpty(attrVal)) {
            locCopy.setUnits(attrVal);
        }
    }

    return locCopy.formatValue(strValue.trim());
}

From source file:com.twentyn.patentExtractor.PatentDocument.java

public static PatentDocument patentDocumentFromXMLStream(InputStream iStream)
        throws IOException, ParserConfigurationException, SAXException, TransformerConfigurationException,
        TransformerException, XPathExpressionException {

    // Create XPath objects for validating that this document is actually a patent.
    XPath xpath = Util.getXPathFactory().newXPath();
    XPathExpression versionXPath = xpath.compile(PATH_DTD_VERSION);
    XPathExpression versionXPathApp = xpath.compile(PATH_DTD_VERSION_APP);

    DocumentBuilderFactory docFactory = Util.mkDocBuilderFactory();
    DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
    Document doc = docBuilder.parse(iStream);

    Util.DocumentType docType = Util.identifyDocType(doc);
    if (docType != Util.DocumentType.PATENT && docType != Util.DocumentType.APPLICATION) {
        LOGGER.warn("Found unexpected document type: " + docType);
        return null;
    }/*from ww  w .  j  a  v  a  2 s  . co  m*/

    boolean isApplication = docType == Util.DocumentType.APPLICATION;
    // Yes this is in fact the way suggested by the XPath API.
    String version;
    if (!isApplication) {
        version = (String) versionXPath.evaluate(doc, XPathConstants.STRING);
    } else {
        version = (String) versionXPathApp.evaluate(doc, XPathConstants.STRING);
    }

    if (version == null || !VERSION_MAP.containsKey(version)) {
        LOGGER.warn(String.format("Unrecognized patent DTD version: %s", version));
        return null;
    }

    HashMap<String, String> paths = VERSION_MAP.get(version);

    /* Create XPath objects for extracting the fields of interest based on the version information.
     * TODO: extract these into some sharable, thread-safe place, maybe via dependency injection.
     */
    XPathExpression idXPath = xpath.compile(paths.get(PATH_KEY_FILE_ID));
    XPathExpression dateXPath = xpath.compile(paths.get(PATH_KEY_DATE));
    XPathExpression titleXPath = xpath.compile(paths.get(PATH_KEY_TITLE));
    XPathExpression classificationXPath = xpath.compile(paths.get(PATH_KEY_MAIN_CLASSIFICATION));
    XPathExpression furtherClassificationsXPath = xpath.compile(paths.get(PATH_KEY_FURTHER_CLASSIFICATIONS));
    XPathExpression searchedClassificationsXPath = xpath.compile(paths.get(PATH_KEY_SEARCHED_CLASSIFICATIONS));

    String fileId = (String) idXPath.evaluate(doc, XPathConstants.STRING);
    String date = (String) dateXPath.evaluate(doc, XPathConstants.STRING);
    NodeList titleNodes = (NodeList) titleXPath.evaluate(doc, XPathConstants.NODESET);
    String title = StringUtils.join(" ", extractTextFromHTML(docBuilder, titleNodes));
    String classification = (String) classificationXPath.evaluate(doc, XPathConstants.STRING);
    NodeList furtherClassificationNodes = (NodeList) furtherClassificationsXPath.evaluate(doc,
            XPathConstants.NODESET);
    ArrayList<String> furtherClassifications = null;
    if (furtherClassificationNodes != null) {
        furtherClassifications = new ArrayList<>(furtherClassificationNodes.getLength());
        for (int i = 0; i < furtherClassificationNodes.getLength(); i++) {
            Node n = furtherClassificationNodes.item(i);
            String txt = n.getTextContent();
            if (txt != null) {
                furtherClassifications.add(i, txt);
            }
        }
    } else {
        furtherClassifications = new ArrayList<>(0);
    }

    NodeList otherClassificationNodes = (NodeList) searchedClassificationsXPath.evaluate(doc,
            XPathConstants.NODESET);
    ArrayList<String> otherClassifications = null;
    if (otherClassificationNodes != null) {
        otherClassifications = new ArrayList<>(otherClassificationNodes.getLength());
        for (int i = 0; i < otherClassificationNodes.getLength(); i++) {
            Node n = otherClassificationNodes.item(i);
            String txt = n.getTextContent();
            if (txt != null) {
                otherClassifications.add(i, txt);
            }
        }
    } else {
        otherClassifications = new ArrayList<>(0);
    }

    // Extract text content for salient document paths.
    List<String> allTextList = getRelevantDocumentText(docBuilder, PATHS_TEXT, xpath, doc);
    List<String> claimsTextList = getRelevantDocumentText(docBuilder, new String[] { PATH_CLAIMS }, xpath, doc);

    return new PatentDocument(fileId, date, title, classification, furtherClassifications, otherClassifications,
            allTextList, claimsTextList, isApplication);
}

From source file:Main.java

/**
 * Extract default values of variables defined in the DOM subtree below node.
 * Default values are used to populate the variableDefs map.  Variables
 * already defined in this map will NOT be modified.
 *
 * @param node root node of DOM subtree to extract default values from.
 * @param variableDefs map which default values will be added to.
 *//*from www  .ja  va  2  s  .  c  o m*/
public static void extractVariableDefaults(final Node node, Map<String, String> variableDefs) {
    switch (node.getNodeType()) {
    case Node.ELEMENT_NODE:
        final Element element = (Element) node;
        final NamedNodeMap attrs = element.getAttributes();
        for (int i = 0; i < attrs.getLength(); i++) {
            Attr attr = (Attr) attrs.item(i);
            extractVariableDefaultsFromString(attr.getValue(), variableDefs);
        }
        break;

    case Node.CDATA_SECTION_NODE:
        String content = node.getTextContent();
        extractVariableDefaultsFromString(content, variableDefs);
        break;

    default:
        break;
    }

    final NodeList children = node.getChildNodes();
    for (int childIndex = 0; childIndex < children.getLength(); childIndex++)
        extractVariableDefaults(children.item(childIndex), variableDefs);
}

From source file:com.github.sevntu.checkstyle.internal.ChecksTest.java

private static void validateSonarProperties(Class<?> module, Set<Node> parameters) {
    final String moduleName = module.getName();
    final Set<String> properties = getFinalProperties(module);

    for (Node parameter : parameters) {
        final NamedNodeMap attributes = parameter.getAttributes();
        final Node paramKeyNode = attributes.getNamedItem("key");

        Assert.assertNotNull(moduleName + " requires a key for unknown parameter in sonar", paramKeyNode);

        final String paramKey = paramKeyNode.getTextContent();

        Assert.assertFalse(moduleName + " requires a valid key for unknown parameter in sonar",
                paramKey.isEmpty());/*w w w. j a v a 2s .c  o  m*/

        Assert.assertTrue(moduleName + " has an unknown parameter in sonar: " + paramKey,
                properties.remove(paramKey));
    }

    for (String property : properties) {
        Assert.fail(moduleName + " parameter not found in sonar: " + property);
    }
}

From source file:edu.illinois.cs.cogcomp.utils.Utils.java

/**
 * Used for reading data from the NEWS2015 dataset.
 * @param fname/*from w  w  w  .java  2 s . c om*/
 * @return
 * @throws ParserConfigurationException
 * @throws IOException
 * @throws SAXException
 */
public static List<MultiExample> readNEWSData(String fname)
        throws ParserConfigurationException, IOException, SAXException {
    File file = new File(fname);
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    DocumentBuilder db = dbf.newDocumentBuilder();
    Document document = db.parse(file);

    NodeList nl = document.getElementsByTagName("Name");

    List<MultiExample> examples = new ArrayList<>();

    for (int i = 0; i < nl.getLength(); i++) {
        Node n = nl.item(i);

        NodeList sourceandtargets = n.getChildNodes();
        MultiExample me = null;
        for (int j = 0; j < sourceandtargets.getLength(); j++) {

            Node st = sourceandtargets.item(j);
            if (st.getNodeName().equals("SourceName")) {
                me = new MultiExample(st.getTextContent().toLowerCase(), new ArrayList<String>());
            } else if (st.getNodeName().equals("TargetName")) {
                if (me != null) {
                    me.addTransliteratedWord(st.getTextContent());
                }
            }
        }
        examples.add(me);
    }

    return examples;
}

From source file:Main.java

/**
 * Build {@link Map} of namespace URIs to prefixes.
 * // www . j  a v a 2  s .c  o  m
 * @param root
 *          {@link Element} to get namespaces and prefixes from.
 * @return {@link Map} of namespace URIs to prefixes.
 * @since 8.1
 */
private static final Map<String, String> buildNamespacePrefixMap(final Element root) {
    final HashMap<String, String> namespacePrefixMap = new HashMap<>();

    //Look for all of the attributes of cache that start with
    //xmlns
    NamedNodeMap attributes = root.getAttributes();
    for (int i = 0; i < attributes.getLength(); i++) {
        Node item = attributes.item(i);
        if (item.getNodeName().startsWith("xmlns")) {
            //Anything after the colon is the prefix
            //eg xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
            //has a prefix of xsi
            String[] splitName = item.getNodeName().split(":");
            String prefix;
            if (splitName.length > 1) {
                prefix = splitName[1];
            } else {
                prefix = "";
            }
            String uri = item.getTextContent();
            namespacePrefixMap.put(uri, prefix);
        }
    }

    return namespacePrefixMap;
}

From source file:de.codesourcery.eve.apiclient.utils.XMLParseHelper.java

public static String selectNodeValue(Document doc, XPathExpression expr, boolean isRequired) {
    final Node n = selectNode(doc, expr, isRequired);

    if (n == null) {
        return null;
    }//  w  w w.ja v a  2s. com

    final String result = n.getTextContent();
    if (StringUtils.isBlank(result)) {
        if (isRequired) {
            throw new UnparseableResponseException(
                    "Selected node '" + n.getNodeName() + "' does not contain a value");
        }
        return null;
    }
    return result;
}

From source file:edu.lternet.pasta.dml.parser.document.DocumentDataPackageParser.java

public static Map document2Map(Document doc, Map attributeXPaths) {
    Map record = new OrderedMap();

    try {//from   w w  w. j  a va  2  s.  c o m

        //go through the list of attribute labels (key to xpath values)
        Iterator xPathIter = attributeXPaths.keySet().iterator();
        while (xPathIter.hasNext()) {
            String attributeLabel = (String) xPathIter.next();
            String attributeXPath = (String) attributeXPaths.get(attributeLabel);

            //handle NodeList, not just single Node
            NodeList attributeNodeList = XMLUtilities.getNodeListWithXPath(doc.getDocumentElement(),
                    attributeXPath);

            //include placeholders for those non existent attributes but include null values
            if (attributeNodeList == null) {
                log.debug("no nodes found for xPath: " + attributeXPath);
                record.put(attributeLabel, null);
                log.debug("added null placeholder for attribute: " + attributeLabel);
                continue;
            }
            //get the value[s] for the attribute
            for (int i = 0; i < attributeNodeList.getLength(); i++) {

                //get the node
                Node attributeNode = attributeNodeList.item(i);

                //get the text value of the node
                //TODO should we use DOM level 3 and assume java 1.5?
                String nodeTextContent = null; //attributeNode.getTextContent();
                nodeTextContent = attributeNode.getTextContent();
                //               if (attributeNode.getFirstChild() != null && attributeNode.getFirstChild().getNodeType() == Node.TEXT_NODE) {
                //                  nodeTextContent = attributeNode.getFirstChild().getNodeValue();
                //               }

                //add the attribute to the Map, taking care to handle multiples
                String columnLabel = attributeLabel;
                if (record.containsKey(columnLabel)) {
                    if (i == 1) {
                        //get the first original label without the number and reassign it with "_1"
                        Object firstValue = record.get(columnLabel);
                        //record.remove(columnLabel);
                        record.put(columnLabel + "_" + i, firstValue);
                    }
                    columnLabel = columnLabel + "_" + (i + 1);
                }
                record.put(columnLabel, nodeTextContent);

                log.debug("added flat attribute: " + columnLabel + "=" + nodeTextContent);

            }

        }
    } catch (Exception e) {
        log.error("could not flatten attributes in document: " + e.getMessage());
        e.printStackTrace();
    }

    return record;
}

From source file:Main.java

/**
 * Prints a textual representation of the given node to the specified PrintStream.
 *
 * @param  n    Node that is to be printed.
 * @param  out  The PrintStream to which the node is to be printed.
 * @pre    n != null && out != null
 */// w  ww  . j  a  v  a2s.c o  m
public static void printXMLNode(Node n, PrintStream out) {
    switch (n.getNodeType()) {
    case Node.DOCUMENT_NODE:
        out.println("DOC_ROOT");
        break;

    case Node.ELEMENT_NODE:
        out.println("<" + ((Element) n).getTagName() + ">");
        break;

    case Node.ATTRIBUTE_NODE:
        out.println("@" + ((Attr) n).getName());
        break;

    case Node.TEXT_NODE:
        out.println("\"" + ((Text) n).getWholeText().trim() + "\"");
        break;

    case Node.COMMENT_NODE:
        out.println("COMMENT: \"" + n.getTextContent().trim() + "\"");
        break;

    default:
        out.println("Unknown node type: " + n.getNodeType());
    }
}