List of usage examples for org.jsoup.nodes Element child
public Element child(int index)
From source file:org.asqatasun.rules.elementselector.ImageElementSelector.java
/** * An link is seen as composite when it is composed with more than one * element. The tested element has at least one image. If the text is different * from the one of the child element, the link is composite by definition. * It the text is identical, we check whether the current element has more * than 1 child./*from w ww . j av a 2s . c o m*/ * @param imageParent * @return whether the current image is a composite link. */ private boolean isCompositeLink(Element imageParent, Element image) { if (imageParent == null) { return false; } if (!StringUtils.equals(imageParent.text(), image.text())) { return true; } if (imageParent.children().size() == 1) { return isCompositeLink(imageParent.child(0), image); } else if (imageParent.children().size() > 1) { return true; } return false; }
From source file:org.trec.liveqa.GetYAnswersPropertiesFromQid.java
/** * /* ww w .ja v a 2 s.c o m*/ * @param iQid question ID * @return map of features and attributes: question title, body, category, best answer, date * @throws Exception */ public static Map<String, String> extractData(String iQid) throws Exception { Map<String, String> res = new LinkedHashMap<>(); res.put("qid", iQid); // parse date from qid res.put("Date", DATE_FORMAT.parse(iQid.substring(0, 14)).toString()); // get and mine html page String url = URL_PREFIX + iQid; HttpClient client = new HttpClient(); GetMethod method = new GetMethod(url); method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); try { int statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { System.err.println("Method failed: " + method.getStatusLine()); } InputStream responseBody = method.getResponseBodyAsStream(); // strip top levels Document doc = Jsoup.parse(responseBody, "UTF8", url); Element html = doc.child(0); Element body = html.child(1); Element head = html.child(0); // get category res.put("Top level Category", findElementText(body, cc)); // get title res.put("Title", findElementText(head, ct)); // get body res.put("Body", findElementText(head, cb)); // get keywords res.put("Keywords", findElementText(head, ck)); // get best answer Element best_answer_div = html.select("div#ya-best-answer").first(); if (best_answer_div != null) { res.put("Best Answer", findElementText(best_answer_div, cba)); } responseBody.close(); } catch (HttpException e) { System.err.println("Fatal protocol violation: " + e.getMessage()); e.printStackTrace(); } catch (IOException e) { System.err.println("Fatal transport error: " + e.getMessage()); e.printStackTrace(); } finally { method.releaseConnection(); } return res; }
From source file:poe.trade.assist.UniquesListSearchGenerator.java
/**imgurl, reqLvl, base, mod * @param args//from ww w . java2 s . c om * @throws Exception */ public static void main(String[] args) throws Exception { List<String> outputLines = new LinkedList<>(); outputLines.add( "Name Art Req.Level Base Mods TaslismanSC TalismanHC Standard Hardcore poewiki"); for (String list : lists) { HttpResponse<String> response = Unirest.get("http://pathofexile.gamepedia.com/" + list) .header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0") .asString(); Document doc = Jsoup.parse(response.getBody()); Elements elems = doc.select("table.wikitable.sortable"); for (Element table : elems) { Elements rows = table.select("tr"); int ctr = 0; boolean hasRequiredLevel = false; for (Element row : rows) { if (ctr == 0) { // first row is headers hasRequiredLevel = !row.select("abbr[title=\"Required Level\"]").isEmpty(); ctr++; continue; } String name = row.child(0).child(0).attr("title"); System.out.println("Now processing: " + name); String imgurl = "=IMAGE(\"" + row.select("img").attr("src") + "\", 3)"; String base = row.child(1).child(0).attr("title"); String reqLvl = hasRequiredLevel ? row.child(2).text() : "0"; reqLvl = reqLvl.equalsIgnoreCase("n/a") ? "0" : reqLvl; String mod = "=\""; Elements mods = row.select("span.itemboxstatsgroup.text-mod"); if (!mods.isEmpty()) { if (mods.size() > 2) throw new Exception("mods.size() is > 2. " + name + " - " + mods.toString()); boolean hasImplicit = mods.size() > 1; String imp = hasImplicit ? mods.get(0).text() : ""; int expIdx = hasImplicit ? 1 : 0; String lineSeparator = "\"&CHAR(10)&\""; String exp = mods.get(expIdx).textNodes().stream().map(n -> n.text().trim()) .filter(s -> !s.isEmpty()).collect(Collectors.joining(lineSeparator)); String additionalExp = mods.get(expIdx).children().stream().filter(e -> e.hasText()) .map(e -> e.text().trim()).collect(Collectors.joining(lineSeparator)); if (additionalExp != null && !additionalExp.isEmpty()) exp += lineSeparator + additionalExp; mod += imp; if (hasImplicit) mod += (lineSeparator + "--------------" + lineSeparator); mod += exp; } mod += "\""; String standard = "Standard"; String hardcore = "Hardcore"; String tempsc = "Talisman"; String temphc = "Talisman+Hardcore"; String nameenc = URLEncoder.encode(name, "UTF-8"); String sc = hyperlink(getSearchURL(standard, nameenc)); String hc = hyperlink(getSearchURL(hardcore, nameenc)); String tsc = hyperlink(getSearchURL(tempsc, nameenc)); String thc = hyperlink(getSearchURL(temphc, nameenc)); String poewikiurl = hyperlink("http://pathofexile.gamepedia.com/" + (name.replace(' ', '_'))); String s = format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", name, imgurl, reqLvl, base, mod, tsc, thc, sc, hc, poewikiurl); outputLines.add(s); Thread.sleep(1000); } } } FileUtils.writeLines(new File("uniqueslist.txt"), outputLines); }