Example usage for org.jsoup.nodes Element text

List of usage examples for org.jsoup.nodes Element text

Introduction

In this page you can find the example usage for org.jsoup.nodes Element text.

Prototype

public String text() 

Source Link

Document

Gets the combined text of this element and all its children.

Usage

From source file:abelymiguel.miralaprima.GetPrima.java

private HashMap<String, Float> getPrimaDataDMacro(String country_code, String providerUrl, String indexName)
        throws IOException {
    HashMap<String, Float> respuestaJson = new HashMap<String, Float>();
    HashMap<String, Object> primaJson;

    Float prima_value;/*ww w  .  j a va 2s  .  c om*/
    Float prima_delta;
    Float prima_percent;

    Document doc;
    doc = Jsoup.connect(providerUrl + indexName).get();

    try {
        Element riskPremium = doc.select(".numero").first();
        //                System.out.println("Prima: " + riskPremium.text());
        prima_value = Float.valueOf(riskPremium.text().replace(".", "")).floatValue();

        Element riskDelta = doc.select(".text-success").first();
        String deltaStr = riskDelta.text().substring(riskDelta.text().indexOf(">") + 1);
        prima_delta = Float.valueOf(deltaStr).floatValue();
        //                System.out.println("Trending delta: " + prima_delta);

        String percentStr;
        prima_percent = 100 * prima_delta / (prima_value - prima_delta);
        DecimalFormat df = new DecimalFormat("0.00");
        percentStr = df.format(prima_percent);
        prima_percent = Float.valueOf(percentStr).floatValue();
        //                System.out.println("Trending prima_percent: " + prima_percent);

        respuestaJson.put("prima_value", prima_value);
        respuestaJson.put("prima_delta", prima_delta);
        respuestaJson.put("prima_percent", prima_percent);

        if (isSameDay(country_code)) {
            this.updatePrimaInDB(prima_value, prima_delta, prima_percent,
                    this.getLatestPrimaIdFromDB(country_code));

        } else {
            this.storePrimaInDB(prima_value, prima_delta, prima_percent, country_code);
        }
    } catch (Exception ex) {
        Logger.getLogger(GetPrima.class.getName()).log(Level.SEVERE, null, ex);
        primaJson = getLatestPrimaFromDB(country_code);
        respuestaJson.put("prima_value", (Float) primaJson.get("prima_value"));
        respuestaJson.put("prima_delta", (Float) primaJson.get("prima_delta"));
        respuestaJson.put("prima_percent", (Float) primaJson.get("prima_percent"));
    }

    return respuestaJson;

}

From source file:ie.nuim.cs.dri.metadata.WebSearch.java

/**
 *
 * @param title the title of the ROS/*from w  w  w  . j  a va2  s .c o m*/
 */
public void searchGoogle(String title) {
    String searchTitle = buildGoogleSearchTitle(title);
    boolean found = false;
    String publication = "";
    String publicationType = "";
    int citationCount = -1;
    String url = "http://scholar.google.com/scholar?" + searchTitle;
    Document doc = Jsoup.parse(getGS());
    Elements aElement = doc.getElementsByTag("h3");
    System.out.println("=====searching google=======");
    for (Element e : aElement) {
        Elements bElement = e.getElementsByTag("a");
        for (Element f : bElement) {
            System.out.println(f.text() + "\t" + title);

            if (title.equalsIgnoreCase(f.text())) {
                found = true;
                break;
            }
        }
        // System.out.println(e);

    }
    if (found == true) {
        Elements pElement = doc.getElementsByTag("div");
        for (Element p : pElement) {
            Elements pubElement = p.getElementsByClass("gs_a");
            for (Element pub : pubElement) {
                System.out.println(pub);
            }

        }
        for (Element p : pElement) {
            Elements pubElement = p.getElementsByClass("gs_fl");
            for (Element pub : pubElement) {
                System.out.println(pub);
            }

        }

    }
}

From source file:abelymiguel.miralaprima.GetPrima.java

private HashMap<String, Float> getPrimaDataBloom(String country_code, String providerUrl, String indexName) {

    HashMap<String, Float> respuestaJson = new HashMap<String, Float>();
    HashMap<String, Object> primaJson;

    Float prima_value;//from   ww w .jav  a  2 s .c  om
    Float prima_delta;
    Float prima_percent;

    Document doc;
    try {
        doc = Jsoup.connect(providerUrl + indexName).get();
        Element riskPremium = doc.select(".price").last();
        //              System.out.println("Prima: " + riskPremium.text());
        prima_value = Float.valueOf(riskPremium.text().replace(".", "")).floatValue();

        Elements riskPremiumsUp = doc.select(".trending_up");
        Elements riskPremiumsDown = doc.select(".trending_down");
        //              System.out.println("Trending: " + riskPremiumsUp.text());
        //              System.out.println("Trending: " + riskPremiumsDown.text());

        if (!riskPremiumsUp.text().equals("")) {
            String delta = riskPremiumsUp.text();
            prima_delta = Float.valueOf(delta.substring(0, delta.indexOf(" ")).replace(",", "")).floatValue();
            //                  System.out.println("Delta: " + prima_delta);

            String percent = riskPremiumsUp.text();
            prima_percent = Float.valueOf(percent.substring(percent.indexOf(" ") + 1, percent.length() - 1))
                    .floatValue();
            //                  System.out.println("Percent: " + prima_percent);
        } else if (!riskPremiumsDown.text().equals("")) {
            String delta = riskPremiumsDown.text();
            prima_delta = Float.valueOf(delta.substring(0, delta.indexOf(" ")).replace(",", "")).floatValue();
            prima_delta = prima_delta * -1;
            //                  System.out.println("Delta: " + prima_delta);

            String percent = riskPremiumsDown.text();
            prima_percent = Float.valueOf(percent.substring(percent.indexOf(" ") + 1, percent.length() - 1))
                    .floatValue();
            prima_percent = prima_percent * -1;
            //                  System.out.println("Percent: " + prima_percent);
        } else {
            prima_delta = 0f;
            prima_percent = 0f;
        }
        respuestaJson.put("prima_value", prima_value);
        respuestaJson.put("prima_delta", prima_delta);
        respuestaJson.put("prima_percent", prima_percent);

        if (isSameDay(country_code)) {
            this.updatePrimaInDB(prima_value, prima_delta, prima_percent,
                    this.getLatestPrimaIdFromDB(country_code));
        } else {
            this.storePrimaInDB(prima_value, prima_delta, prima_percent, country_code);
        }
    } catch (Exception ex) {
        Logger.getLogger(GetPrima.class.getName()).log(Level.SEVERE, null, ex);
        primaJson = getLatestPrimaFromDB(country_code);
        respuestaJson.put("prima_value", (Float) primaJson.get("prima_value"));
        respuestaJson.put("prima_delta", (Float) primaJson.get("prima_delta"));
        respuestaJson.put("prima_percent", (Float) primaJson.get("prima_percent"));
    }

    return respuestaJson;
}

From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java

private void setFilmReviewImdbGrade(Document doc, FilmReview filmReview) {
    Elements imdbElements = doc.select(".fm-title .fm-orange");
    if (CollectionUtils.isNotEmpty(imdbElements) && imdbElements.size() == 1) {
        Element imdbElement = imdbElements.get(0);
        if (null != imdbElement) {
            String imdbGradeStr = imdbElement.text();
            if (StringUtils.isNotBlank(imdbGradeStr) && StringUtils.contains(imdbGradeStr, "IMDB")) {
                String gradeStr = StringUtils
                        .trimToEmpty(imdbGradeStr.substring(imdbGradeStr.indexOf("IMDB") + "IMDB".length()));
                if (StringUtils.isNotBlank(gradeStr)) {
                    float grade = getGrade(gradeStr);
                    filmReview.setGradeDouban(grade);
                }/*from   w  w w. j  a  v  a 2s . c om*/
            }
        }
    }
}

From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java

private void setFilmReviewDoubanGrade(Document doc, FilmReview filmReview) {
    Elements doubanElements = doc.select(".fm-title .fm-green");
    if (CollectionUtils.isNotEmpty(doubanElements) && doubanElements.size() == 1) {
        Element doubanElement = doubanElements.get(0);
        if (null != doubanElement) {
            String doubanGradeStr = doubanElement.text();
            if (StringUtils.isNotBlank(doubanGradeStr) && StringUtils.contains(doubanGradeStr, "")) {
                String gradeStr = StringUtils.trimToEmpty(
                        doubanGradeStr.substring(doubanGradeStr.indexOf("") + "".length()));
                if (StringUtils.isNotBlank(gradeStr)) {
                    float grade = getGrade(gradeStr);
                    filmReview.setGradeDouban(grade);
                }/*from  w w  w.j a  v a 2 s. c  o m*/
            }
        }
    }
}

From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.DBpediaOntologyOld.java

private void traverseHierarchy(Element e, DBpediaCategory category, HashMap<String, DBpediaCategory> map) {
    for (Element c : e.children()) {
        String tagName = c.tag().getName();
        if (tagName.equals("a")) {
            String href = c.attr("href");
            if (href != null && href.length() > 0) {
                category.setLabel(c.text());
                category.setUri(CLASSES_BASE_URI + c.text());
                map.put(category.getLabel(), category);
                System.out.println(c.text() + "\t" + CLASSES_BASE_URI + c.text());
            }/*w ww.  j av  a  2s.  co  m*/
        } else if (tagName.equals("ul")) {
            for (Element c1 : c.children()) {
                if (c1.tagName().equals("li")) {
                    DBpediaCategory cc = new DBpediaCategory();
                    traverseHierarchy(c1, cc, map);
                    cc.parents = new HashSet<>();
                    cc.parents.add(category);
                    category.getSubClasses().add(cc);
                }
            }
        }
    }
}

From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java

private void addFilmGenreList(Elements filmTitleElements, Film film) {
    Elements genreElements = filmTitleElements.select(".fm-genre");
    if (CollectionUtils.isNotEmpty(genreElements) && genreElements.size() >= 2) {
        Element genreElement = genreElements.get(1);
        if (null != genreElement) {
            String genreStr = genreElement.text().toString();
            if (StringUtils.isNotBlank(genreStr)) {
                List<String> genreList = SLASH_SPLITTER.splitToList(genreStr);
                if (CollectionUtils.isNotEmpty(genreList)) {
                    genreList.forEach(genre -> {
                        EnumGenre queryGenre = new EnumGenre();
                        queryGenre.setUrlGenre(genre);
                        EnumGenre enumGenre = enumGenreMapper.queryEnumGenreByEnumGenre(queryGenre);

                        if (null != enumGenre) {
                            FilmGenre filmGenre = new FilmGenre();
                            filmGenre.setFilmCode(film.getCode());
                            filmGenre.setGenreId(enumGenre.getId());

                            Date now = new Date();
                            filmGenre.setCreateTime(now);
                            filmGenre.setUpdateTime(now);
                            filmGenreMapper.insertSelective(filmGenre);
                        }/*from ww w .j a v a 2 s . com*/
                    });
                }
            }
        }
    }
}

From source file:com.laudandjolynn.mytv.crawler.tvmao.TvMaoCrawler.java

@Override
public boolean exists(TvStation station) {
    String city = station.getCity();
    String classify = station.getClassify();
    if (city == null || classify == null) {
        return false;
    }//from  w  w  w. ja v a 2  s.c  o m
    String tvMaoFile = getCrawlFilePath(station);
    File file = new File(tvMaoFile);
    if (file.exists()) {
        String html = null;
        try {
            html = MyTvUtils.readAsHtml(tvMaoFile);
        } catch (IOException e) {
            return false;
        }
        Document doc = Jsoup.parse(html);
        Elements classifyElements = doc.select("div.chlsnav div.pbar b");
        String classifyName = classifyElements.get(0).text().trim();
        Elements channelElements = doc.select("div.chlsnav ul.r li");
        for (Element element : channelElements) {
            Element channel = element.child(0);
            String stationName = channel.text().trim();
            if (stationName.equals(station.getName()) && classifyName.equals(classify)) {
                return true;
            }
        }
        return false;
    }

    HtmlPage htmlPage = (HtmlPage) WebCrawler.crawl(TV_MAO_URL);
    try {
        if ((htmlPage = searchStation(htmlPage, station)) != null) {
            MyTvUtils.outputCrawlData(getCrawlerName(), htmlPage.asXml(), getCrawlFileName(city, classify));
            return true;
        }
    } catch (Exception e) {
        logger.error("error occur while search station: " + station.getName(), e);
    }

    return false;
}

From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java

private String getFilmUrlYear(Document doc, String value) {
    String urlYear = StringUtils.EMPTY;

    Elements yearElements = doc.select(".fm-intro span .fm-genre");
    if (CollectionUtils.isNotEmpty(yearElements)) {
        Element yearElement = yearElements.get(0);
        if (null != yearElement) {
            String yearText = StringUtils.trimToEmpty(yearElement.text().toString());
            if (StringUtils.isNotBlank(yearText)) {
                urlYear = covertToUrlYear(yearText);
            }/*ww  w  .ja  v a2s.  c o m*/
        }
    }

    if (StringUtils.isBlank(urlYear) && StringUtils.isNotBlank(value)) {
        String yearText = value.substring(0, value.indexOf("-"));
        if (StringUtils.isNotBlank(yearText)) {
            urlYear = covertToUrlYear(yearText);
        }
    }
    return urlYear;
}

From source file:com.laudandjolynn.mytv.crawler.tvmao.TvMaoCrawler.java

/**
 * ??// w  w  w .  ja  v a 2  s.  co m
 * 
 * @param city
 * @param html
 * @return
 */
private List<TvStation> parseTvStation(String city, String html) {
    Document doc = Jsoup.parse(html);
    Elements classifyElements = doc.select("div.chlsnav div.pbar b");
    String classify = classifyElements.get(0).text().trim();
    List<TvStation> resultList = new ArrayList<TvStation>();
    Elements channelElements = doc.select("div.chlsnav ul.r li");
    for (Element element : channelElements) {
        Element channel = element.child(0);
        TvStation tv = new TvStation();
        String stationName = channel.text().trim();
        tv.setName(stationName);
        tv.setCity(city);
        tv.setClassify(classify);
        tv.setSequence(SEQUENCE.incrementAndGet());
        for (CrawlEventListener listener : listeners) {
            listener.itemFound(new TvStationFoundEvent(this, tv));
        }
        resultList.add(tv);
    }
    return resultList;
}