List of usage examples for NodeList getItem
public T getItem(int index)
From source
License:Apache License
public NodeList<Element> filter(NodeList<Element> nodes, String selector, boolean filterDetached) { JsNodeArray res = JsNodeArray.create(); if (selector.isEmpty()) { return res; }//from w w w . ja va 2s . co m Element ghostParent = null; HashSet<Node> parents = new HashSet<Node>(); HashSet<Node> elmList = new HashSet<Node>(); for (int i = 0, l = nodes.getLength(); i < l; i++) { Node e = nodes.getItem(i); if (e == window || e == document || e.getNodeName() == null || "html".equalsIgnoreCase(e.getNodeName())) { continue; } elmList.add(e); if (filterDetached) { Element p = e.getParentElement(); if (p == null) { if (ghostParent == null) { ghostParent = Document.get().createDivElement(); parents.add(ghostParent); } p = ghostParent; p.appendChild(e); } else if (!parents.contains(p)) { parents.add(p); } } else if (parents.isEmpty()) { parents.add(document); } } for (Node e : parents) { NodeList<Element> n = select(selector, e); for (int i = 0, l = n.getLength(); i < l; i++) { Element el = n.getItem(i); if (elmList.remove(el)) { res.addNode(el); } } } if (ghostParent != null) { ghostParent.setInnerHTML(null); } return res; }
From source
License:Apache License
/** * Merge the oldNodes list into the newNodes one. If oldNodes is null, a new * list will be created and returned. If oldNodes is not null, a new list will * be created depending on the create flag. *//*from w ww. ja va 2 s . c o m*/ public static NodeList<Element> copyNodeList(NodeList<Element> oldNodes, NodeList<Element> newNodes, boolean create) { NodeList<Element> ret = oldNodes == null || create ? JsNodeArray.create() : oldNodes; JsCache idlist = JsCache.create(); for (int i = 0; oldNodes != null && i < oldNodes.getLength(); i++) { Element e = oldNodes.getItem(i); idlist.put(e.hashCode(), 1); if (create) { ret.<JsNodeArray>cast().addNode(e, i); } } for (int i = 0, l = newNodes.getLength(), j = ret.getLength(); i < l; i++) { Element e = newNodes.getItem(i); if (!idlist.exists(e.hashCode())) { ret.<JsNodeArray>cast().addNode(newNodes.getItem(i), j++); } } return ret; }
From source
License:Apache License
/** * @param parent//from w ww . j a v a2s . c o m * @return */ private getActiveInput( parent) { NodeList<> elts = parent.getElementsByTagName("input"); if (elts.getLength() == 1) { return elts.getItem(0); } else { assert elts.getLength() == 2; switch (getViewData(lastKey).section) { case AttributeName: return elts.getItem(0); case AttributeValue: return elts.getItem(1); default: throw new UnsupportedOperationException(); } } }
From source
License:Open Source License
/** * Scale the canvas on the html page.//from ww w.j ava 2s . c o m */ private void scaleCanvas() { Element element = Document.get().getElementById("embed-html"); int width = getWindowInnerWidth(); int height = getWindowInnerHeight(); consoleLog(String.valueOf(width) + " x " + String.valueOf(height)); NodeList<Element> nl = element.getElementsByTagName("canvas"); if (nl != null && nl.getLength() > 0) { Element canvas = nl.getItem(0); canvas.setAttribute("width", "" + width + "px"); canvas.setAttribute("height", "" + height + "px"); canvas.getStyle().setWidth(width, Style.Unit.PX); canvas.getStyle().setHeight(height, Style.Unit.PX); canvas.getStyle().setTop(0, Style.Unit.PX); canvas.getStyle().setLeft(0, Style.Unit.PX); canvas.getStyle().setPosition(Style.Position.ABSOLUTE); } }
From source
License:Open Source License
/** * Creates a new minimal HTML document containing copies of the DOM nodes identified as the * core elements of the page. Some additional re-formatting hints may be included in the new * document./*w w w . j a v a 2 s . co m*/ * * @param textOnly indicates whether to simply return the aggregated text content instead of * HTML * @param contentNodes the DOM nodes containing text to be included in the final docuemnt. * @return A HTML or text document which includes the aggregated content of the provided HTML * nodes. */ private String formatExtractedNodes(boolean textOnly, List<Node> contentNodes) { NodeTree expandedList = NodeListExpander.expand(contentNodes); Node clonedSubtree = expandedList.cloneSubtreeRetainDirection(); if (clonedSubtree.getNodeType() != Node.ELEMENT_NODE) return ""; // determine text directionality textDirection ="dir"); // The base URL in the distilled page viewer is different from that in // the live page. This breaks all relative links (in anchors, // images, etc.), so make them absolute in the distilled content. makeAllLinksAbsolute(clonedSubtree); // Get URLs of the extracted images. if (clonedSubtree.getNodeType() == Node.ELEMENT_NODE) { NodeList<Element> allImages ="IMG"); for (int i = 0; i < allImages.getLength(); i++) { String imageUrl = allImages.getItem(i).getAttribute("src"); if (!imageUrl.isEmpty()) { imageUrls.add(imageUrl); } } } if (textOnly) { return getTextFromTree(clonedSubtree); } // TODO(cjhopman): this discards the top element and just returns its children. This might // break in some cases. return; }
From source
License:Open Source License
private static void makeAllLinksAbsolute(Node rootNode) { Element root =; // AnchorElement.getHref() and ImageElement.getSrc() both return the // absolute URI, so simply set them as the respective attributes. NodeList<Element> allLinks = root.getElementsByTagName("A"); for (int i = 0; i < allLinks.getLength(); i++) { AnchorElement link =; if (!link.getHref().isEmpty()) { link.setHref(link.getHref()); }/*from w ww.j av a 2 s .c o m*/ } NodeList<Element> videoTags = root.getElementsByTagName("VIDEO"); for (int i = 0; i < videoTags.getLength(); i++) { VideoElement video = (VideoElement) videoTags.getItem(i); if (!video.getPoster().isEmpty()) { video.setPoster(video.getPoster()); } } makeAllSrcAttributesAbsolute(root); }
From source
License:Open Source License
/** * @return The title of the distilled document. *//*from ww w . j a v a2s. c o m*/ public static String getDocumentTitle(Object objTitle, Element root) { String currTitle = "", origTitle = ""; if (objTitle.getClass() == currTitle.getClass()) { // If objTitle is of String type. currTitle = origTitle = objTitle.toString(); } else if (root != null) { // Otherwise, use text of first TITLE element. NodeList<Element> titles = root.getElementsByTagName("TITLE"); if (titles.getLength() > 0) { // Use javacript textContent instead of javascript innerText; the latter only returns // visible text, but <title> tags are invisible. currTitle = origTitle = DomUtil.javascriptTextContent(titles.getItem(0)); } } if (currTitle == "") return ""; if (StringUtil.match(currTitle, " [\\|\\-] ")) { // Title has '|' and/or '-'. // Get part before last '|' or '-'. currTitle = StringUtil.findAndReplace(origTitle, "(.*)[\\|\\-] .*", "$1"); if (StringUtil.splitLength(currTitle, "\\s+") < 3) { // Part has < 3 words. // Get part after first '|' or '-'. currTitle = StringUtil.findAndReplace(origTitle, "[^\\|\\-]*[\\|\\-](.*)", "$1"); } } else if (currTitle.indexOf(": ") != -1) { // Title has ':'. // Get part after last ':'. currTitle = StringUtil.findAndReplace(origTitle, ".*:(.*)", "$1"); if (StringUtil.splitLength(currTitle, "\\s+") < 3) { // Part has < 3 words. // Get part after first ':'. currTitle = StringUtil.findAndReplace(origTitle, "[^:]*[:](.*)", "$1"); } } else if (root != null && (currTitle.length() > 150 || currTitle.length() < 15)) { // Get plain text from the only H1 element. // TODO(kuan): this is what readability does, but this block may make more sense as an // if rather than else-if, e.g. currently this else-if block is used when original title // is "foo" but not when it is "foo |" or "foo:". currTitle = findFirstH1(root); if (currTitle.isEmpty()) currTitle = origTitle; } currTitle = StringUtil.jsTrim(currTitle); if (StringUtil.splitLength(currTitle, "\\s+") <= 4) currTitle = origTitle; return currTitle; }
From source
License:Open Source License
private static String findFirstH1(Element root) { NodeList<Element> hOnes = root.getElementsByTagName("H1"); // Use javacript innerText instead of javascript textContent; the former only returns // visible text, and we assume visible H1's are more inclined to being potential titles. String h1 = ""; for (int i = 0; i < hOnes.getLength() && h1.isEmpty(); i++) { h1 = DomUtil.getInnerText(hOnes.getItem(i)); }//from w w w . j a v a2 s .co m return h1; }
From source
License:Open Source License
public static Element javaGetFirstElementWithClassName(Element root, String className) { NodeList<Element> allElems = root.getElementsByTagName("*"); for (int i = 0; i < allElems.getLength(); i++) { Element elem = allElems.getItem(i); if (hasClassName(elem, className)) return elem; }//from ww w . ja v a 2 s.c o m return null; }
From source
License:Open Source License
private void findPublisher() { mPublisher = ""; // Look for "publisher" or "source_organization" attribute in any html tag. NodeList<Element> allElems = mRoot.getElementsByTagName("*"); for (int i = 0; i < allElems.getLength() && mPublisher.isEmpty(); i++) { Element e = allElems.getItem(i); mPublisher = e.getAttribute("publisher"); if (mPublisher.isEmpty()) mPublisher = e.getAttribute("source_organization"); }/*w w w . jav a 2 s .co m*/ }