List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:Main.java
static Elements getDivForClass(Document document, String className) { return document.select("div[id= " + className + "]"); }
From source file:Main.java
private static String parseCardContent(Document doc) { try {/* ww w.j av a 2s . com*/ Element summaryEl = doc.select("div[class=card-summary-content]").get(0); summaryEl.select("sup").remove(); return summaryEl.text(); } catch (Exception e) { e.printStackTrace(); } return null; }
From source file:Main.java
public static List<org.jsoup.nodes.Element> findAllElements(String segment, String name, String attrname, String attrvalue) {//ww w . j av a 2 s . c om List<org.jsoup.nodes.Element> ret = new LinkedList<org.jsoup.nodes.Element>(); Document doc = Jsoup.parse(segment); Elements elements = doc.select(name); for (int i = 0; i < elements.size(); i++) { String value = elements.get(i).attr(attrname); if (value != null) { if (value.equals(attrvalue)) { ret.add(elements.get(i)); } } } return ret; }
From source file:io.github.carlomicieli.footballdb.starter.parsers.TeamRosterParser.java
private static Optional<Element> rosterTable(Document doc) { Element table = doc.select("#result").first(); return Optional.ofNullable(table); }
From source file:com.mmj.app.common.util.SpiderHtmlUtils.java
public static String parserHtml(String html, String select) { if (StringUtils.isNotEmpty(html)) { Document doc = Jsoup.parse(html); Elements linksElements = doc.select(select); if (linksElements == null || linksElements.isEmpty()) { return null; }/*www.java 2s . c o m*/ for (Element ele : linksElements) { return ele.text(); } } return null; }
From source file:com.mmj.app.common.util.SpiderHtmlUtils.java
public static String fetchDescriptionHtml(String html) { if (StringUtils.isNotEmpty(html)) { Document doc = Jsoup.parse(html); Elements linksElements = doc.select("head>meta[name=Description]"); if (linksElements == null || linksElements.isEmpty()) { return null; }//from w w w . j a va 2 s .c o m for (Element ele : linksElements) { String contextS = ele.attr("content"); return contextS; } } return null; }
From source file:fr.mael.microrss.util.Tools.java
/** * Cleans an html source/*from ww w.j a va 2s .co m*/ * @param source * @return */ public static String cleanHTML(String source) { if (source == null) { return null; } Document doc = Jsoup.parse(source); doc.select("link").remove(); doc.select("*[style]").removeAttr("style"); doc.select("style,script").remove(); return doc.select("body").html().toString(); }
From source file:com.ds.test.ClientFormLogin.java
public static String parseHtml(String html) { String token = ""; Document doc = Jsoup.parse(html); Elements eles = doc.select("[name=authenticity_token]"); for (Element ele : eles) { String val = ele.attr("value"); token = val; System.out.println("token:" + val); }/*from w w w.j a va 2 s .c o m*/ return token; }
From source file:com.feilong.tools.jsoup.JsoupUtil.java
/** * Gets the elements by select.//w w w . jav a 2 s . c om * * @param url * the url * @param selectQuery * the select query * @return the elements by select * @throws JsoupUtilException * the jsoup util exception * @see #getDocument(String) * @see org.jsoup.nodes.Element#select(String) */ public static Elements getElementsBySelect(String url, String selectQuery) throws JsoupUtilException { Validate.notEmpty(url); Validate.notEmpty(selectQuery); Document document = getDocument(url); Elements elements = document.select(selectQuery); return elements; }
From source file:downloadwolkflow.getWorkFlowList.java
public static String[] getPageList() { String[] pageList = null;/* w ww . j av a2 s. c om*/ CloseableHttpClient httpclient = HttpClients.createDefault(); try { HttpGet httpget = new HttpGet("http://www.myexperiment.org/workflows"); HttpResponse response = httpclient.execute(httpget); String mainpage = EntityUtils.toString(response.getEntity()); Document mainDoc = Jsoup.parse(mainpage); Element pageinfo = mainDoc.select("div.pagination ").first(); // System.out.println(pageinfo.toString()); Elements pagesElemenets = pageinfo.select("[href]"); int pageSize = Integer.parseInt(pagesElemenets.get(pagesElemenets.size() - 2).text()); pageList = new String[pageSize + 1]; for (int i = 1; i <= pageSize; i++) { pageList[i] = "http://www.myexperiment.org/workflows?page=" + i; } } catch (IOException ex) { Logger.getLogger(getWorkFlowList.class.getName()).log(Level.SEVERE, null, ex); } return pageList; }