List of usage examples for org.jsoup.nodes Element getElementsByClass
public Elements getElementsByClass(String className)
From source file:Main.java
public static void main(String[] args) throws IOException { Document doc = Jsoup.connect("http://www.your server.com/").get(); Elements tableRows = doc.select("tr"); for (Element row : tableRows) { Elements cls1 = row.getElementsByClass("cls1"); Elements cls2 = row.getElementsByClass("cls2"); Elements cls3 = row.getElementsByClass("cls3"); if (!cls1.isEmpty() && !cls2.isEmpty() && !cls3.isEmpty()) { System.out.println(cls1.get(0).text()); System.out.println(cls2.get(0).text()); System.out.println(cls3.get(0).text()); }//from w w w . j av a 2 s .co m } }
From source file:controllers.BBProxy.java
public static F.Promise<Result> index(String query) { if (StringUtils.isEmpty(query)) { F.Promise.promise(new F.Function0<Object>() { @Override/*from ww w. jav a 2 s. c om*/ public Object apply() throws Throwable { return ok(Json.toJson("Query parameter (q) not provided ")); } }); } F.Promise<WSResponse> wsResponsePromise = WS.url("http://www.bloomberg.com/search") .setQueryParameter("query", query).get(); return wsResponsePromise.map(new F.Function<WSResponse, Result>() { @Override public Result apply(WSResponse wsResponse) throws Throwable { String body = wsResponse.getBody(); List<Map<String, String>> ret = new ArrayList<Map<String, String>>(); try { // Insert into map org.jsoup.nodes.Document doc = Jsoup.parse(body); Elements items = doc.getElementsByClass("search-result"); for (Element item : items) { Map<String, String> keyValue = new LinkedHashMap<String, String>(); keyValue.put("image", item.getElementsByClass("search-result-story__thumbnail__image").attr("src")); keyValue.put("title", item.getElementsByClass("search-result-story__headline").text()); int index = item.getElementsByClass("search-result-story__body").text() .indexOf(" (Source: Bloomberg"); if (index == -1) { keyValue.put("content", item.getElementsByClass("search-result-story__body").text()); } else { keyValue.put("content", item.getElementsByClass("search-result-story__body").text() .substring(0, index)); } keyValue.put("date", item.getElementsByClass("published-at").text()); keyValue.put("url", "www.bloomberg.com/" + item.getElementsByClass("search-result-story__thumbnail__link").attr("href")); ret.add(keyValue); } } catch (DOMException e) { e.printStackTrace(); } return ok(Json.toJson(ret)); } }); }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
private static Element getSingleElementByClass(Element root, String className) { Elements elementsByClass = root.getElementsByClass(className); if (elementsByClass.size() != 1) { String error = "Found " + elementsByClass.size() + " elements with class " + className + " inside of " + root.tagName() + "-" + root.className(); throw new RuntimeException(error + root.html()); }//w w w.jav a2 s. c om return elementsByClass.first(); }
From source file:app.sunstreak.yourpisd.net.Parser.java
/** Parses average of each term from GradeSummary.aspx. * NOTICE: Does not work for second semester classes in which the second semester schedule * is different from the first semester schedule. * //from w ww . j ava 2 s.co m * @param doc the Jsoup element of GradeSummary.aspx * @param classList classList as returned by Init.aspx * @throws org.json.JSONException * @return [ * [classId, avg0, avg1, ...], * [classId, avg0, avg1, ...], * ] */ public static int[][] gradeSummary(Element doc, JSONArray classList) { List<int[]> gradeSummary = new ArrayList<int[]>(); Element reportTable = doc.getElementsByClass("reportTable").get(0).getElementsByTag("tbody").get(0); Elements rows = reportTable.getElementsByTag("tr"); int rowIndex = 0; while (rowIndex < rows.size()) { int[] classAverages = new int[11]; Arrays.fill(classAverages, -3); Element row = rows.get(rowIndex); Elements columns = row.getElementsByTag("td"); classAverages[0] = getClassId(row); for (int col = 0; col < 10; col++) { Element column = columns.get(col); String text = column.text(); // -2 for disabled class if (column.attr("class").equals("disabledCell")) text = "-2"; classAverages[col + 1] = text.equals("") ? -1 : Integer.parseInt(text); } gradeSummary.add(classAverages); rowIndex++; } /* * [ * [classId, avg0, avg1, ...], * [classId, avg0, avg1, ...], * ] */ int[][] result = new int[gradeSummary.size()][]; for (int i = 0; i < result.length; i++) { result[i] = new int[gradeSummary.get(i).length]; for (int j = 0; j < result[i].length; j++) result[i][j] = gradeSummary.get(i)[j]; } return result; }
From source file:ie.nuim.cs.dri.metadata.WebSearch.java
/** * * @param xmlString/*from w ww .j a va 2s .c om*/ * @param title * @return * @throws Exception */ public static ROS extractCiteSeerMetadata(String xmlString, String title) throws Exception { //System.out.println("CiteSeer search returned:\n"+xmlString); Document doc = Jsoup.parse(xmlString); ROS ros = new ROS(); String articleTitle = ""; int pubYear = 0; Elements resultElements = doc.getElementsByClass("result"); for (Element result : resultElements) { Elements titleElement = result.getElementsByClass("doc_details"); System.out.println(titleElement.text() + "\t" + title); if (!titleElement.text().equalsIgnoreCase(title)) { break; } else { ros.setArticleTitle(title); Elements authorElement = result.getElementsByClass("pubinfo"); //authors=authorElement.text(); Elements yearElement = result.getElementsByClass("pubyear"); String yearStr = yearElement.text().replace(", ", ""); int year = (yearStr.length() > 0 ? Integer.parseInt(yearStr) : 0); System.out.println("year:" + yearElement.text().replace(", ", "")); ros.setYear(year); Elements citeElement = result.getElementsByClass("citation"); String[] citedBy = citeElement.text().split(" "); int citeby = (citedBy.length > 2 ? Integer.parseInt(citedBy[2]) : -1); ros.setCitedByCount(citeby); Elements publicationElement = result.getElementsByClass("pubvenue"); String pub = publicationElement.text().replace("- ", "").toLowerCase(); if (pub.contains("journal")) { ros.setPublicationType("Journal"); } else if (pub.contains("conference") || pub.contains("conf.") || pub.contains("proc.")) { ros.setPublicationType("Conference"); } else { ros.setPublicationType(""); } ros.setPublicationName(pub); } } // } return ros; }
From source file:fr.arlefebvre.pronostics.controller.EuroMatchListController.java
@RequestMapping("/euro2016/matches") public List<Match> matches() { if (pseudoCache != null && !pseudoCache.isEmpty()) return pseudoCache; ArrayList<Match> result = new ArrayList<Match>(); String uri = "http://www.lequipe.fr/Football/Euro/Saison-2016/calendrier-resultats.html"; //On se connecte au site et on charge le document html Document doc;/*from w ww . java 2 s . co m*/ try { doc = Jsoup.connect(uri).get(); Elements elements = doc.getElementsByClass("mainDate"); for (Element element : elements) { Element title = element.getElementsByClass("title").first(); String date = title.text(); Element tbody = element.getElementsByTag("tbody").first(); for (Element matchElement : tbody.children()) { String groupe = matchElement.getElementsByClass("date").first().text(); String home = matchElement.getElementsByClass("domicile").first().text(); String away = matchElement.getElementsByClass("exterieur").first().text(); Match m = new Match(); m.setDate(date); m.setHomeTeamId(home); m.setAwayTeamId(away); m.setGroup(groupe); result.add(m); } } } catch (IOException e) { e.printStackTrace(); } if (pseudoCache == null) pseudoCache = result; return result; }
From source file:fr.arlefebvre.pronostics.controller.UEFATeamsController.java
@RequestMapping("/uefa/teams") public List<Team> teams() { if (pseudoCache != null && !pseudoCache.isEmpty()) return pseudoCache; ArrayList<Team> result = new ArrayList<Team>(); String uri = "http://fr.fifa.com/fifa-world-ranking/ranking-table/men/uefa.html"; //On se connecte au site et on charge le document html Document doc;// w w w .j a v a 2 s.co m try { doc = Jsoup.connect(uri).get(); Elements elements = doc.getElementsByClass("table"); for (Element element : elements) { Element tbody = element.getElementsByTag("tbody").first(); for (Element child : tbody.children()) { Element teamNameElement = child.getElementsByClass("tbl-teamname").first(); String name = teamNameElement.text(); String countryCode = child.getElementsByClass("tbl-countrycode").first().text(); String imgUrl = teamNameElement.select("img").first().absUrl("src"); Team team = new Team(); team.setName(name); team.setCountryCode(countryCode); team.setImgUrl(imgUrl); team.setNationalTeam(true); result.add(team); } } //String titre = element.text(); } catch (IOException e) { e.printStackTrace(); } // RestTemplate restTemplate = new RestTemplate(); // ResponseEntity<ChampionListDto> response = restTemplate.getForEntity( // uri, // ChampionListDto.class); // // List<ChampionDto> champions = response.getBody().getChampions(); // return champions.stream().map(c -> getChampionById(c.getId()).getName()).collect(Collectors.toList()); result.sort((t1, t2) -> t1.getName().compareTo(t2.getName())); if (pseudoCache == null) pseudoCache = result; return result; }
From source file:jobhunter.cb.Client.java
public Job execute() throws IOException, URISyntaxException { l.debug("Connecting to {}", url); update("Connecting", 1L); final Document doc = Jsoup.connect(url).get(); update("Parsing HTML", 2L); final Job job = Job.of(); job.setPortal(CareerBuilderPlugin.portal); job.setLink(url);/*from ww w . j a va 2s . c o m*/ URLEncodedUtils.parse(new URI(url), "UTF-8").stream() .filter(nvp -> nvp.getName().equalsIgnoreCase("job_did")).findFirst() .ifPresent(param -> job.setExtId(param.getValue())); job.setPosition(doc.getElementById("job-title").text()); job.setAddress(doc.getElementById("CBBody_Location").text()); job.getCompany().setName(doc.getElementById("CBBody_CompanyName").text()); StringBuilder description = new StringBuilder(); description.append(StringEscapeUtils.unescapeHtml4(doc.getElementById("pnlJobDescription").html())); Element div = doc.getElementById("job-requirements"); description.append(StringEscapeUtils.unescapeHtml4(div.getElementsByClass("section-body").html())); div = doc.getElementById("job-snapshot-section"); description.append(StringEscapeUtils.unescapeHtml4(div.getElementsByClass("section-body").html())); job.setDescription(description.toString()); update("Done", 3L); return job; }
From source file:mx.clickfactura.util.TipoCambioUtil.java
public String getTipoCambio(String fecha) throws CustomBadRequestException, CustomNotFoundException, Exception { Pattern pattern = Pattern.compile("^\\d{4}\\-\\d{2}\\-\\d{2}$"); Matcher matcher = null;//w w w . j av a 2s .c o m matcher = pattern.matcher(fecha.trim()); if (!matcher.matches()) { throw new CustomBadRequestException("Fecha invalida, el formato debe ser: yyyy-MM-dd"); } SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); Calendar cal = new GregorianCalendar(); cal.setTime(sdf.parse(fecha)); String dia = (cal.get(Calendar.DATE) < 10) ? "0" + cal.get(Calendar.DATE) : cal.get(Calendar.DATE) + ""; String mes = ((cal.get(Calendar.MONTH) + 1) < 10) ? "0" + (cal.get(Calendar.MONTH) + 1) : (cal.get(Calendar.MONTH) + 1) + ""; String anio = cal.get(Calendar.YEAR) + ""; String fechaInicial = dia + "%2F" + mes + "%2F" + anio; CloseableHttpClient client = HttpClients.createDefault(); CookieStore cookies = new BasicCookieStore(); String[] fechaSeparada = fecha.split("-"); HttpGet get = new HttpGet("http://www.dof.gob.mx/indicadores_detalle.php?cod_tipo_indicador=158&dfecha=" + fechaInicial + "&hfecha=" + fechaInicial); HttpContext httpContext = new BasicHttpContext(); httpContext.setAttribute(HttpClientContext.COOKIE_STORE, cookies); CloseableHttpResponse response = client.execute(get, httpContext); //System.out.println(response.toString()); //System.out.println(response.getStatusLine()); //System.out.println(response.getEntity().getContentLength()); InputStream in = response.getEntity().getContent(); Header encoding = response.getEntity().getContentEncoding(); String body = IOUtils.toString(in, "UTF-8"); //System.out.println(body); Document doc = Jsoup.parse(body, "UTF-8"); doc = doc.normalise(); //System.out.println(doc.toString()); Elements e = doc.select("table"); Iterator iterator = e.iterator(); pattern = Pattern.compile("^\\d{2}\\.\\d{6}$"); matcher = null; String tipoCambio = null; while (iterator.hasNext()) { Element xd = (Element) iterator.next(); if (xd.getElementsByClass("txt").hasAttr("height")) { if (xd.getElementsByClass("txt").text().split(" ").length == 6) { String cambio = xd.getElementsByClass("txt").text().split(" ")[5]; matcher = pattern.matcher(cambio.trim()); if (matcher.matches()) { tipoCambio = cambio; //System.out.println(tipoCambio); break; } } } } client.close(); response.close(); if (tipoCambio == null || tipoCambio.isEmpty()) { throw new CustomNotFoundException("No hay un tipo de cambio para el da: " + fecha); } return tipoCambio; }
From source file:Leitura.Jxr.java
public String leituraJxr() throws IOException { //mtodo para pegar os nomes dos mtodos declarados Elements elements = document.getElementsByTag("pre"); elements.select("a.jxr_linenumber").remove(); // elements.select("strong.jxr_keyword").remove(); // elements.select("span.jxr_string").remove(); // elements.select("em.jxr_comment").remove(); for (Element children : elements) { children.getElementsByClass("jxr_comment").remove(); children.getElementsByClass("jxr_javadoccomment").remove(); }//from ww w .j av a 2 s .com return elements.text(); // retorna o cdigo sem lixo }