List of usage examples for org.jsoup.select Elements first
public Element first()
From source file:Search.DataManipulation.DataParser.java
public Map<String, String> getRatingData(Document dom) { Map<String, String> ratingData = new HashMap<>(); Elements ratingClass = dom.getElementsByClass("score-container"); Elements ratingDom = ratingClass.first().children(); for (Element rating : ratingDom) { String item = rating.attr("itemprop"); String content = rating.attr("content"); if (item.equals("ratingValue")) { ratingData.put("rating", content); } else if (item.equals("ratingCount")) { ratingData.put(item, content); }/* w w w .j a va2 s . co m*/ } return ratingData; }
From source file:solarrecorder.SolarRecorder.java
private void getProdData() throws IOException { org.jsoup.nodes.Document doc = Jsoup.connect("http://envoy/production").get(); Element h1 = doc.getElementsByTag("h1").first(); Element table = h1.nextElementSibling(); Elements alltr = table.getElementsByTag("tbody").first().getElementsByTag("tr"); for (Element tr : alltr) { Elements alltd = tr.getElementsByTag("td"); if (alltd.size() == 2) { String name = alltd.first().text(); String value = alltd.last().text(); switch (name) { case "Currently": case "Today": envoyData.add(new EnvoyData(name, value)); break; }/*from w ww . ja v a 2s.c o m*/ } } }
From source file:solarrecorder.SolarRecorder.java
private void getSysData() throws IOException { org.jsoup.nodes.Document doc = Jsoup.connect("http://envoy").get(); Elements allh2 = doc.getElementsByTag("h2"); for (Element h2 : allh2) { if (h2.text().equals("System Statistics")) { Elements tables = h2.parent().getElementsByTag("table"); Elements alltr = tables.first().getElementsByTag("tbody").first().getElementsByTag("tr"); for (Element tr : alltr) { Elements alltd = tr.getElementsByTag("td"); String name = alltd.first().text(); String value = alltd.last().text(); if (name.equals("Number of Microinverters Online")) { envoyData.add(new EnvoyData(name, value)); }// ww w . j av a 2 s.c o m } } } }
From source file:wherehows.common.utils.GitUtil.java
/** * Crawlling the project page to get list of repositories, only works for Gitorious * @param projectUrl the project url e.g. https://git.example.com/project * @return List of path of repositories e.g. project/repo * @throws IOException/*w w w. ja v a 2 s . c o m*/ */ public static Map<String, String> getRepoListFromProject(String projectUrl) throws IOException { Map<String, String> repoList = new HashMap<>(); Document doc = Jsoup.connect(projectUrl).data("format", "xml").get(); Elements repos = doc.getElementsByTag("repositories"); Elements mainlines = repos.first().getElementsByTag("mainlines"); Elements repo = mainlines.first().getElementsByTag("repository"); for (Element e : repo) { String repoName = e.getElementsByTag("name").first().text(); String repoUrl = e.getElementsByTag("clone_url").first().text(); repoList.put(repoName.trim(), repoUrl.trim()); } return repoList; }
From source file:xxx.web.comments.debates.impl.ProConOrgParser.java
@Override public Debate parseDebate(InputStream inputStream) throws IOException { Debate result = new Debate(); Document doc = Jsoup.parse(inputStream, "UTF-8", "http://www.procon.org/"); // Set the Url of the doc // title/*from w w w . j a v a2s .c om*/ Element body = doc.body(); Elements debateTitleElements = body.select("h2"); // Elements debateTitleElements = body.select("p[class=title]").select("p[style]"); if (debateTitleElements.first() == null) { // not a debate return null; } String title = Utils.normalize(debateTitleElements.first().text()); result.setTitle(title); Elements proConTr = body.select("tr > td > b:contains(PRO \\(yes\\))"); if (proConTr == null || proConTr.parents() == null || proConTr.parents().first() == null || proConTr.parents().first().parents() == null || proConTr.parents().first().parents().first() == null || proConTr.parents().first().parents().first().nextElementSibling() == null) { // not a pro-con debate return null; } Element trAnswers = proConTr.parents().first().parents().first().nextElementSibling(); // the PRO side Element proTd = trAnswers.select("td").get(0); Element conTd = trAnswers.select("td").get(1); // System.out.println(proTd.select("blockquote").size()); // System.out.println(conTd.select("blockquote").size()); for (Element text : proTd.select("blockquote > div[class=editortext]")) { Argument argument = new Argument(); argument.setStance("pro"); argument.setText(extractPlainTextFromTextElement(text)); argument.setOriginalHTML(text.html()); // set ID idCounter++; argument.setId("pcq_" + idCounter); if (!argument.getText().isEmpty()) { result.getArgumentList().add(argument); } else { System.err.println("Failed to extract text from " + text.html()); } } for (Element text : conTd.select("blockquote > div[class=editortext]")) { Argument argument = new Argument(); argument.setStance("con"); argument.setText(extractPlainTextFromTextElement(text)); argument.setOriginalHTML(text.html()); idCounter++; argument.setId("pcq_" + idCounter); if (!argument.getText().isEmpty()) { result.getArgumentList().add(argument); } else { System.err.println("Failed to extract text from " + text.html()); } } // show some stats: Map<String, Integer> map = new HashMap<>(); map.put("pro", 0); map.put("con", 0); for (Argument argument : result.getArgumentList()) { map.put(argument.getStance(), map.get(argument.getStance()) + 1); } System.out.println(map); return result; }