List of usage examples for org.jsoup.nodes Element text
public String text()
From source file:abelymiguel.miralaprima.GetPrima.java
private HashMap<String, Float> getPrimaDataDMacro(String country_code, String providerUrl, String indexName) throws IOException { HashMap<String, Float> respuestaJson = new HashMap<String, Float>(); HashMap<String, Object> primaJson; Float prima_value;/*ww w . j a va 2s . c om*/ Float prima_delta; Float prima_percent; Document doc; doc = Jsoup.connect(providerUrl + indexName).get(); try { Element riskPremium = doc.select(".numero").first(); // System.out.println("Prima: " + riskPremium.text()); prima_value = Float.valueOf(riskPremium.text().replace(".", "")).floatValue(); Element riskDelta = doc.select(".text-success").first(); String deltaStr = riskDelta.text().substring(riskDelta.text().indexOf(">") + 1); prima_delta = Float.valueOf(deltaStr).floatValue(); // System.out.println("Trending delta: " + prima_delta); String percentStr; prima_percent = 100 * prima_delta / (prima_value - prima_delta); DecimalFormat df = new DecimalFormat("0.00"); percentStr = df.format(prima_percent); prima_percent = Float.valueOf(percentStr).floatValue(); // System.out.println("Trending prima_percent: " + prima_percent); respuestaJson.put("prima_value", prima_value); respuestaJson.put("prima_delta", prima_delta); respuestaJson.put("prima_percent", prima_percent); if (isSameDay(country_code)) { this.updatePrimaInDB(prima_value, prima_delta, prima_percent, this.getLatestPrimaIdFromDB(country_code)); } else { this.storePrimaInDB(prima_value, prima_delta, prima_percent, country_code); } } catch (Exception ex) { Logger.getLogger(GetPrima.class.getName()).log(Level.SEVERE, null, ex); primaJson = getLatestPrimaFromDB(country_code); respuestaJson.put("prima_value", (Float) primaJson.get("prima_value")); respuestaJson.put("prima_delta", (Float) primaJson.get("prima_delta")); respuestaJson.put("prima_percent", (Float) primaJson.get("prima_percent")); } return respuestaJson; }
From source file:ie.nuim.cs.dri.metadata.WebSearch.java
/** * * @param title the title of the ROS/*from w w w . j a va2 s .c o m*/ */ public void searchGoogle(String title) { String searchTitle = buildGoogleSearchTitle(title); boolean found = false; String publication = ""; String publicationType = ""; int citationCount = -1; String url = "http://scholar.google.com/scholar?" + searchTitle; Document doc = Jsoup.parse(getGS()); Elements aElement = doc.getElementsByTag("h3"); System.out.println("=====searching google======="); for (Element e : aElement) { Elements bElement = e.getElementsByTag("a"); for (Element f : bElement) { System.out.println(f.text() + "\t" + title); if (title.equalsIgnoreCase(f.text())) { found = true; break; } } // System.out.println(e); } if (found == true) { Elements pElement = doc.getElementsByTag("div"); for (Element p : pElement) { Elements pubElement = p.getElementsByClass("gs_a"); for (Element pub : pubElement) { System.out.println(pub); } } for (Element p : pElement) { Elements pubElement = p.getElementsByClass("gs_fl"); for (Element pub : pubElement) { System.out.println(pub); } } } }
From source file:abelymiguel.miralaprima.GetPrima.java
private HashMap<String, Float> getPrimaDataBloom(String country_code, String providerUrl, String indexName) { HashMap<String, Float> respuestaJson = new HashMap<String, Float>(); HashMap<String, Object> primaJson; Float prima_value;//from ww w .jav a 2 s .c om Float prima_delta; Float prima_percent; Document doc; try { doc = Jsoup.connect(providerUrl + indexName).get(); Element riskPremium = doc.select(".price").last(); // System.out.println("Prima: " + riskPremium.text()); prima_value = Float.valueOf(riskPremium.text().replace(".", "")).floatValue(); Elements riskPremiumsUp = doc.select(".trending_up"); Elements riskPremiumsDown = doc.select(".trending_down"); // System.out.println("Trending: " + riskPremiumsUp.text()); // System.out.println("Trending: " + riskPremiumsDown.text()); if (!riskPremiumsUp.text().equals("")) { String delta = riskPremiumsUp.text(); prima_delta = Float.valueOf(delta.substring(0, delta.indexOf(" ")).replace(",", "")).floatValue(); // System.out.println("Delta: " + prima_delta); String percent = riskPremiumsUp.text(); prima_percent = Float.valueOf(percent.substring(percent.indexOf(" ") + 1, percent.length() - 1)) .floatValue(); // System.out.println("Percent: " + prima_percent); } else if (!riskPremiumsDown.text().equals("")) { String delta = riskPremiumsDown.text(); prima_delta = Float.valueOf(delta.substring(0, delta.indexOf(" ")).replace(",", "")).floatValue(); prima_delta = prima_delta * -1; // System.out.println("Delta: " + prima_delta); String percent = riskPremiumsDown.text(); prima_percent = Float.valueOf(percent.substring(percent.indexOf(" ") + 1, percent.length() - 1)) .floatValue(); prima_percent = prima_percent * -1; // System.out.println("Percent: " + prima_percent); } else { prima_delta = 0f; prima_percent = 0f; } respuestaJson.put("prima_value", prima_value); respuestaJson.put("prima_delta", prima_delta); respuestaJson.put("prima_percent", prima_percent); if (isSameDay(country_code)) { this.updatePrimaInDB(prima_value, prima_delta, prima_percent, this.getLatestPrimaIdFromDB(country_code)); } else { this.storePrimaInDB(prima_value, prima_delta, prima_percent, country_code); } } catch (Exception ex) { Logger.getLogger(GetPrima.class.getName()).log(Level.SEVERE, null, ex); primaJson = getLatestPrimaFromDB(country_code); respuestaJson.put("prima_value", (Float) primaJson.get("prima_value")); respuestaJson.put("prima_delta", (Float) primaJson.get("prima_delta")); respuestaJson.put("prima_percent", (Float) primaJson.get("prima_percent")); } return respuestaJson; }
From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java
private void setFilmReviewImdbGrade(Document doc, FilmReview filmReview) { Elements imdbElements = doc.select(".fm-title .fm-orange"); if (CollectionUtils.isNotEmpty(imdbElements) && imdbElements.size() == 1) { Element imdbElement = imdbElements.get(0); if (null != imdbElement) { String imdbGradeStr = imdbElement.text(); if (StringUtils.isNotBlank(imdbGradeStr) && StringUtils.contains(imdbGradeStr, "IMDB")) { String gradeStr = StringUtils .trimToEmpty(imdbGradeStr.substring(imdbGradeStr.indexOf("IMDB") + "IMDB".length())); if (StringUtils.isNotBlank(gradeStr)) { float grade = getGrade(gradeStr); filmReview.setGradeDouban(grade); }/*from w w w. j a v a 2s . c om*/ } } } }
From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java
private void setFilmReviewDoubanGrade(Document doc, FilmReview filmReview) { Elements doubanElements = doc.select(".fm-title .fm-green"); if (CollectionUtils.isNotEmpty(doubanElements) && doubanElements.size() == 1) { Element doubanElement = doubanElements.get(0); if (null != doubanElement) { String doubanGradeStr = doubanElement.text(); if (StringUtils.isNotBlank(doubanGradeStr) && StringUtils.contains(doubanGradeStr, "")) { String gradeStr = StringUtils.trimToEmpty( doubanGradeStr.substring(doubanGradeStr.indexOf("") + "".length())); if (StringUtils.isNotBlank(gradeStr)) { float grade = getGrade(gradeStr); filmReview.setGradeDouban(grade); }/*from w w w.j a v a 2 s. c o m*/ } } } }
From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.DBpediaOntologyOld.java
private void traverseHierarchy(Element e, DBpediaCategory category, HashMap<String, DBpediaCategory> map) { for (Element c : e.children()) { String tagName = c.tag().getName(); if (tagName.equals("a")) { String href = c.attr("href"); if (href != null && href.length() > 0) { category.setLabel(c.text()); category.setUri(CLASSES_BASE_URI + c.text()); map.put(category.getLabel(), category); System.out.println(c.text() + "\t" + CLASSES_BASE_URI + c.text()); }/*w ww. j av a 2s. co m*/ } else if (tagName.equals("ul")) { for (Element c1 : c.children()) { if (c1.tagName().equals("li")) { DBpediaCategory cc = new DBpediaCategory(); traverseHierarchy(c1, cc, map); cc.parents = new HashSet<>(); cc.parents.add(category); category.getSubClasses().add(cc); } } } } }
From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java
private void addFilmGenreList(Elements filmTitleElements, Film film) { Elements genreElements = filmTitleElements.select(".fm-genre"); if (CollectionUtils.isNotEmpty(genreElements) && genreElements.size() >= 2) { Element genreElement = genreElements.get(1); if (null != genreElement) { String genreStr = genreElement.text().toString(); if (StringUtils.isNotBlank(genreStr)) { List<String> genreList = SLASH_SPLITTER.splitToList(genreStr); if (CollectionUtils.isNotEmpty(genreList)) { genreList.forEach(genre -> { EnumGenre queryGenre = new EnumGenre(); queryGenre.setUrlGenre(genre); EnumGenre enumGenre = enumGenreMapper.queryEnumGenreByEnumGenre(queryGenre); if (null != enumGenre) { FilmGenre filmGenre = new FilmGenre(); filmGenre.setFilmCode(film.getCode()); filmGenre.setGenreId(enumGenre.getId()); Date now = new Date(); filmGenre.setCreateTime(now); filmGenre.setUpdateTime(now); filmGenreMapper.insertSelective(filmGenre); }/*from ww w .j a v a 2 s . com*/ }); } } } } }
From source file:com.laudandjolynn.mytv.crawler.tvmao.TvMaoCrawler.java
@Override public boolean exists(TvStation station) { String city = station.getCity(); String classify = station.getClassify(); if (city == null || classify == null) { return false; }//from w w w. ja v a 2 s.c o m String tvMaoFile = getCrawlFilePath(station); File file = new File(tvMaoFile); if (file.exists()) { String html = null; try { html = MyTvUtils.readAsHtml(tvMaoFile); } catch (IOException e) { return false; } Document doc = Jsoup.parse(html); Elements classifyElements = doc.select("div.chlsnav div.pbar b"); String classifyName = classifyElements.get(0).text().trim(); Elements channelElements = doc.select("div.chlsnav ul.r li"); for (Element element : channelElements) { Element channel = element.child(0); String stationName = channel.text().trim(); if (stationName.equals(station.getName()) && classifyName.equals(classify)) { return true; } } return false; } HtmlPage htmlPage = (HtmlPage) WebCrawler.crawl(TV_MAO_URL); try { if ((htmlPage = searchStation(htmlPage, station)) != null) { MyTvUtils.outputCrawlData(getCrawlerName(), htmlPage.asXml(), getCrawlFileName(city, classify)); return true; } } catch (Exception e) { logger.error("error occur while search station: " + station.getName(), e); } return false; }
From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java
private String getFilmUrlYear(Document doc, String value) { String urlYear = StringUtils.EMPTY; Elements yearElements = doc.select(".fm-intro span .fm-genre"); if (CollectionUtils.isNotEmpty(yearElements)) { Element yearElement = yearElements.get(0); if (null != yearElement) { String yearText = StringUtils.trimToEmpty(yearElement.text().toString()); if (StringUtils.isNotBlank(yearText)) { urlYear = covertToUrlYear(yearText); }/*ww w .ja v a2s. c o m*/ } } if (StringUtils.isBlank(urlYear) && StringUtils.isNotBlank(value)) { String yearText = value.substring(0, value.indexOf("-")); if (StringUtils.isNotBlank(yearText)) { urlYear = covertToUrlYear(yearText); } } return urlYear; }
From source file:com.laudandjolynn.mytv.crawler.tvmao.TvMaoCrawler.java
/** * ??// w w w . ja v a 2 s. co m * * @param city * @param html * @return */ private List<TvStation> parseTvStation(String city, String html) { Document doc = Jsoup.parse(html); Elements classifyElements = doc.select("div.chlsnav div.pbar b"); String classify = classifyElements.get(0).text().trim(); List<TvStation> resultList = new ArrayList<TvStation>(); Elements channelElements = doc.select("div.chlsnav ul.r li"); for (Element element : channelElements) { Element channel = element.child(0); TvStation tv = new TvStation(); String stationName = channel.text().trim(); tv.setName(stationName); tv.setCity(city); tv.setClassify(classify); tv.setSequence(SEQUENCE.incrementAndGet()); for (CrawlEventListener listener : listeners) { listener.itemFound(new TvStationFoundEvent(this, tv)); } resultList.add(tv); } return resultList; }