List of usage examples for org.jsoup.select Elements Elements
public Elements(Element... elements)
From source file:com.gistlabs.mechanize.document.html.JsoupDataUtil.java
/** Returns the all elements matching any of the given tags (case-insensitive). */ public static Elements findElementsByTag(Element element, String... tags) { List<Element> results = new ArrayList<Element>(); Set<String> tagSet = new HashSet<String>(); for (String tag : tags) tagSet.add(tag.toLowerCase());//from www . ja v a2s . co m filterElementsByTag(results, element, tagSet); return new Elements(results); }
From source file:org.apache.marmotta.ldclient.provider.phpbb.PHPBBPostProvider.java
/** * Return a mapping table mapping from RDF properties to XPath Value Mappers. Each entry in the * map is evaluated/*from w w w . j ava 2 s .c o m*/ * in turn; in case the XPath expression yields a result, the property is added for the * processed resource. * * @return * @param requestUrl */ @Override protected Map<String, JSoupMapper> getMappings(String resource, String requestUrl) { URI uri = null; try { uri = new URI(requestUrl); Map<String, String> params = new HashMap<String, String>(); for (NameValuePair p : URLEncodedUtils.parse(uri, "UTF-8")) { params.put(p.getName(), p.getValue()); } if (params.containsKey("p")) { // mappings for a reply that has directly been addressed using the ?p=... parameter // to viewtopic.php, e.g. http://www.carving-ski.de/phpBB/viewtopic.php?p=119208 Map<String, JSoupMapper> commentMappings = new HashMap<String, JSoupMapper>(); commentMappings.put(Namespaces.NS_DC + "title", new CssTextLiteralMapper( String.format("div#pagecontent table:has(a[name=p%s]) td.gensmall div", params.get("p"))) { @Override public List<Value> map(String resourceUri, Element elem, ValueFactory factory) { final String val = elem.ownText().replaceFirst("^\\s*:", "").replaceAll(" ", " ") .trim(); if (datatype != null) return Collections.singletonList((Value) factory.createLiteral(val, factory.createURI(Namespaces.NS_XSD + datatype))); else return Collections.singletonList((Value) factory.createLiteral(val)); } @Override public Elements select(Element htmlDoc) { final Element first = super.select(htmlDoc).first(); return first != null ? new Elements(first) : new Elements(); } }); commentMappings.put(Namespaces.NS_DC + "creator", new CssTextLiteralMapper( String.format("div#pagecontent table:has(a[name=p%s]) .postauthor", params.get("p")))); commentMappings.put(Namespaces.NS_DC + "description", new CssTextLiteralMapper( String.format("div#pagecontent table:has(a[name=p%s]) div.postbody", params.get("p")))); commentMappings.put(Namespaces.NS_DC + "date", new PHPBBDateMapper( String.format("div#pagecontent td.gensmall:has(a[name=p%s]) div", params.get("p"))) { @Override public Elements select(Element htmlDoc) { final Elements sel = super.select(htmlDoc); if (sel.size() > 0) { final Element e = sel.get(1); if (e != null) return new Elements(e); } return new Elements(); } }); return commentMappings; } else throw new RuntimeException( "the requested resource does not seem to identify a PHPBB Post (p=... parameter missing)"); } catch (URISyntaxException e) { throw new RuntimeException( "the requested resource does not seem to identify a PHPBB Post (URI syntax error)"); } }
From source file:org.apache.marmotta.ldclient.provider.phpbb.PHPBBTopicProvider.java
/** * Return a mapping table mapping from RDF properties to XPath Value Mappers. Each entry in the map is evaluated * in turn; in case the XPath expression yields a result, the property is added for the processed resource. * * @return/* w w w . j ava 2s . com*/ * @param requestUrl */ @Override protected Map<String, JSoupMapper> getMappings(String resource, String requestUrl) { URI uri = null; try { uri = new URI(requestUrl); Map<String, String> params = new HashMap<String, String>(); for (NameValuePair p : URLEncodedUtils.parse(uri, "UTF-8")) { params.put(p.getName(), p.getValue()); } if (params.containsKey("t")) { Map<String, JSoupMapper> postMappings = new HashMap<String, JSoupMapper>(); if (params.containsKey("start")) { // when start is set, we only take the replies; we are in a second or further page of the topic postMappings.put(Namespaces.NS_SIOC + "container_of", new PHPBBPostIdMapper("div#pagecontent table td.gensmall a[name]")); } else { // otherwise we also take the initial title, creator and date for the topic postMappings.put(Namespaces.NS_DC + "title", new CssTextLiteralMapper("div#pageheader a.titles")); postMappings.put(Namespaces.NS_DC + "creator", new CssTextLiteralMapper(new CssSelectorMapper.Selector() { @Override public Elements select(Element node) { final Element first = node.select("div#pagecontent table b.postauthor").first(); if (first != null) return new Elements(first); return new Elements(); } })); postMappings.put(Namespaces.NS_DC + "date", new PHPBBDateMapper("div#pagecontent table td.gensmall div") { @Override public Elements select(Element htmlDoc) { final Elements sel = super.select(htmlDoc); if (sel.size() > 0) { final Element e = sel.get(1); if (e != null) return new Elements(e); } return new Elements(); } }); postMappings.put(Namespaces.NS_SIOC + "has_container", new PHPBBForumHrefMapper("p.breadcrumbs a") { @Override public Elements select(Element htmlDoc) { final Element select = super.select(htmlDoc).last(); return select != null ? new Elements(select) : new Elements(); } }); postMappings.put(Namespaces.NS_SIOC + "container_of", new PHPBBPostIdMapper("div#pagecontent table td.gensmall a[name]")); } return postMappings; } else throw new RuntimeException( "the requested resource does not seem to identify a PHPBB topic (t=... parameter missing)"); } catch (URISyntaxException e) { throw new RuntimeException( "the requested resource does not seem to identify a PHPBB topic (URI syntax error)"); } }
From source file:cn.wanghaomiao.xpath.core.XpathEvaluator.java
/** * ?xpath//ww w . j a v a 2 s . c o m * * @param xpath * @param root * @return */ public List<JXNode> evaluate(String xpath, Elements root) throws NoSuchAxisException, NoSuchFunctionException { List<JXNode> res = new LinkedList<JXNode>(); Elements context = root; List<Node> xpathNodes = getXpathNodeTree(xpath); for (int i = 0; i < xpathNodes.size(); i++) { Node n = xpathNodes.get(i); LinkedList<Element> contextTmp = new LinkedList<Element>(); if (n.getScopeEm() == ScopeEm.RECURSIVE || n.getScopeEm() == ScopeEm.CURREC) { if (n.getTagName().startsWith("@")) { for (Element e : context) { //? String key = n.getTagName().substring(1); if (key.equals("*")) { res.add(JXNode.t(e.attributes().toString())); } else { String value = e.attr(key); if (StringUtils.isNotBlank(value)) { res.add(JXNode.t(value)); } } //?? for (Element dep : e.getAllElements()) { if (key.equals("*")) { res.add(JXNode.t(dep.attributes().toString())); } else { String value = dep.attr(key); if (StringUtils.isNotBlank(value)) { res.add(JXNode.t(value)); } } } } } else if (n.getTagName().endsWith("()")) { //??text() res.add(JXNode.t(context.text())); } else { Elements searchRes = context.select(n.getTagName()); for (Element e : searchRes) { Element filterR = filter(e, n); if (filterR != null) { contextTmp.add(filterR); } } context = new Elements(contextTmp); if (i == xpathNodes.size() - 1) { for (Element e : contextTmp) { res.add(JXNode.e(e)); } } } } else { if (n.getTagName().startsWith("@")) { for (Element e : context) { String key = n.getTagName().substring(1); if (key.equals("*")) { res.add(JXNode.t(e.attributes().toString())); } else { String value = e.attr(key); if (StringUtils.isNotBlank(value)) { res.add(JXNode.t(value)); } } } } else if (n.getTagName().endsWith("()")) { res = (List<JXNode>) callFunc(n.getTagName().substring(0, n.getTagName().length() - 2), context); } else { for (Element e : context) { Elements filterScope = e.children(); if (StringUtils.isNotBlank(n.getAxis())) { filterScope = getAxisScopeEls(n.getAxis(), e); } for (Element chi : filterScope) { Element fchi = filter(chi, n); if (fchi != null) { contextTmp.add(fchi); } } } context = new Elements(contextTmp); if (i == xpathNodes.size() - 1) { for (Element e : contextTmp) { res.add(JXNode.e(e)); } } } } } return res; }
From source file:cn.wanghaomiao.xpath.core.XpathEvaluator.java
/** * //from ww w . ja v a 2s . co m * * @param e * @param node * @return */ public Element filter(Element e, Node node) throws NoSuchFunctionException, NoSuchAxisException { if (node.getTagName().equals("*") || node.getTagName().equals(e.nodeName())) { if (node.getPredicate() != null && StringUtils.isNotBlank(node.getPredicate().getValue())) { Predicate p = node.getPredicate(); if (p.getOpEm() == null) { if (p.getValue().matches("\\d+") && getElIndex(e) == Integer.parseInt(p.getValue())) { return e; } else if (p.getValue().endsWith("()") && (Boolean) callFilterFunc(p.getValue().substring(0, p.getValue().length() - 2), e)) { return e; } else if (p.getValue().startsWith("@") && e.hasAttr(StringUtils.substringAfter(p.getValue(), "@"))) { return e; } //todo p.value ~= contains(./@href,'renren.com') } else { if (p.getLeft().matches("[^/]+\\(\\)")) { Object filterRes = p.getOpEm().excute( callFilterFunc(p.getLeft().substring(0, p.getLeft().length() - 2), e).toString(), p.getRight()); if (filterRes instanceof Boolean && (Boolean) filterRes) { return e; } else if (filterRes instanceof Integer && e.siblingIndex() == Integer.parseInt(filterRes.toString())) { return e; } } else if (p.getLeft().startsWith("@")) { String lValue = e.attr(p.getLeft().substring(1)); Object filterRes = p.getOpEm().excute(lValue, p.getRight()); if ((Boolean) filterRes) { return e; } } else { // ???xpath? List<Element> eltmp = new LinkedList<Element>(); eltmp.add(e); List<JXNode> rstmp = evaluate(p.getLeft(), new Elements(eltmp)); if ((Boolean) p.getOpEm().excute(StringUtils.join(rstmp, ""), p.getRight())) { return e; } } } } else { return e; } } return null; }