Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.example.amazon.mw.exempli.ExempliClient.java

public ArrayList<String> getImdbData(String videoAsin) {
    ArrayList<String> guides = new ArrayList<String>();
    try {/*  w ww  . jav a 2  s. c o  m*/
        String imdbId = asinToImdbId.get(videoAsin);
        Log.e("Asin: ", videoAsin);
        Log.e("Parent guide page: ", "http://www.imdb.com/title/" + imdbId + "/parentalguide");
        Document doc = Jsoup.connect("http://www.imdb.com/title/" + imdbId + "/parentalguide").get();
        Elements parentGuideElements = doc.select(".display p");

        for (Element element : parentGuideElements) {
            guides.add(element.text());
            Log.e("retrieved guide: ", element.text());
        }
    } catch (IOException e) {
        Log.e("exception", "Client failure: ", e);
    }
    return guides;
}

From source file:com.decker.parkingSearch.receiver.ParkingContentReceiver.java

public void fetch() throws IOException {

    Document doc = Jsoup.connect(this.baseUrl).get();
    Elements detailBox = doc.select("td[style=\"vertical-align:top;\"]");

    for (Element es : detailBox) {
        try {/* ww  w  .  ja v a 2s.com*/
            Park detail = new Park();
            detail.name = es.childNode(0) instanceof Element ? ((Element) es.childNode(0)).text() : "";
            detail.address = "";
            for (int i = 1; i < es.childNodes().size() - 1; i++) {
                String content = (es.childNodes().get(i)).toString();
                if (content.equals((es.childNodes().get(i + 1)).toString())) {
                    break;
                } else {

                    if (!content.equals("<br>")) {
                        detail.address += (StringEscapeUtils.unescapeHtml(content) + " ");
                    }
                }
            }
            if (es.select("span > a").size() == 0) {
                continue;
            }
            String secretContent = StringEscapeUtils
                    .unescapeHtml(es.select("span > a").get(0).attr("href").replaceAll("\"", ""));
            Matcher matcher = Pattern.compile("(?<=javascript\\:count\\().*(?=\\))").matcher(secretContent);
            String[] secretInfoList;
            if (matcher.find()) {
                secretInfoList = matcher.group().split(",");
            } else {
                continue;
            }
            String mobContent = Jsoup
                    .connect(String.format("http://www.goseeaustralia.com.au/statslookup.asp?keyID=%s&StatID=0",
                            secretInfoList[1]))
                    .get().text();
            detail.mobileNumber = StringUtils.isNotBlank(mobContent) ? mobContent : "";
            String phoneContent = Jsoup
                    .connect(String.format("http://www.goseeaustralia.com.au/statslookup.asp?keyID=%s&StatID=1",
                            secretInfoList[1]))
                    .get().text();
            detail.phoneNumber = StringUtils.isNotBlank(phoneContent) ? phoneContent : "";
            String faxContent = Jsoup
                    .connect(String.format("http://www.goseeaustralia.com.au/statslookup.asp?keyID=%s&StatID=2",
                            secretInfoList[1]))
                    .get().text();
            detail.faxNumber = StringUtils.isNotBlank(faxContent) ? faxContent : "";
            detail.email = StringUtils.isNotBlank(secretInfoList[2]) ? secretInfoList[2] : "";
            this.info.parks.add(detail);
        } catch (Exception ex) {
            System.out.printf("Error during fetch %s park with url %s %n", es.childNode(0).toString(),
                    this.baseUrl);
            ex.printStackTrace();
        }
    }

}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

static List<LentItem> parse_medialist(Document doc) {
    List<LentItem> media = new ArrayList<>();
    Elements copytrs = doc.select(".data tr");

    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);

    int trs = copytrs.size();
    if (trs == 1) {
        return null;
    }//w  w  w . j a  v a 2  s .  c o  m
    assert (trs > 0);
    for (int i = 1; i < trs; i++) {
        Element tr = copytrs.get(i);
        LentItem item = new LentItem();

        if (tr.text().contains("keine Daten")) {
            return null;
        }
        item.setTitle(tr.select(".account-display-title").select("b, strong").text().trim());
        try {
            item.setAuthor(tr.select(".account-display-title").html().split("<br[ /]*>")[1].trim());

            String[] col3split = tr.select(".account-display-state").html().split("<br[ /]*>");
            String deadline = Jsoup.parse(col3split[0].trim()).text().trim();
            if (deadline.contains(":")) {
                // BSB Munich: <span class="hidden-sm hidden-md hidden-lg">Flligkeitsdatum : </span>26.02.2016<br>
                deadline = deadline.split(":")[1].trim();
            }
            if (deadline.contains("-")) {
                // Chemnitz: 22.07.2015 - 20.10.2015<br>
                deadline = deadline.split("-")[1].trim();
            }

            try {
                item.setDeadline(fmt.parseLocalDate(deadline).toString());
            } catch (IllegalArgumentException e1) {
                e1.printStackTrace();
            }

            if (col3split.length > 1)
                item.setHomeBranch(col3split[1].trim());

            if (tr.select("a").size() > 0) {
                for (Element link : tr.select("a")) {
                    String href = link.attr("abs:href");
                    Map<String, String> hrefq = getQueryParamsFirst(href);
                    if (hrefq.get("methodToCall").equals("renewal")) {
                        item.setProlongData(href.split("\\?")[1]);
                        item.setRenewable(true);
                        break;
                    }
                }
            }

        } catch (Exception ex) {
            ex.printStackTrace();
        }

        media.add(item);
    }
    return media;
}

From source file:org.javiermoreno.torrentscratcher.Runner.java

public Movie enrichMovieWithImdbSearch(Movie movie) {
    try {//from ww w. j av  a  2 s.com
        String url = "http://www.imdb.com/find?q={title}&s=all";
        String title = movie.getOriginalTitle() != null ? movie.getOriginalTitle() : movie.getTitle();
        url = url.replace("{title}", java.net.URLEncoder.encode(title, "UTF-8"));
        Document doc = Jsoup.connect(url).get();
        Elements results = doc.select(".result_text a");
        if (results.size() == 0) {
            log.warn("IMDB search 404: " + movie.getTitle());
            return movie;
        }
        String link = results.first().attr("href");
        String imdbId = link.substring("/title/".length(), link.indexOf("?") - 1);
        movie.setImdbId(imdbId);
        url = "http://www.imdb.com" + link;
        doc = Jsoup.connect(url).get();
        movie.setGenre(doc.select("[itemprop=genre]").eq(0).text());
        String rating = doc.select("[itemprop=aggregateRating] [itemprop=ratingValue]").text();
        if (rating.isEmpty() == false) {
            movie.setRating(Double.valueOf(rating.replace(',', '.')));
        }
    } catch (IOException ex) {
        log.warn(ex.getMessage());
    }
    return movie;
}

From source file:org.cellcore.code.engine.page.extractor.mb.MBPageDataExtractor.java

protected float getPrice(Document doc) {
    if (!doc.getElementsContainingText("Cette carte n'est pas disponible en stock").isEmpty()) {
        return -1;
    }/* w  w  w .j  a  v  a 2  s  . co  m*/
    Elements tr = doc.select(".stock").get(0).getElementsByTag("tr");
    float iPrice = Float.MAX_VALUE;
    for (int i = 1; i < tr.size(); i++) {
        String val = tr.get(i).getElementsByTag("td").get(3).childNodes().get(0).attr("text");
        val = cleanPriceString(val);
        float price = Float.parseFloat(val);
        if (price < iPrice) {
            iPrice = price;
        }
    }
    return iPrice;
}

From source file:org.wallride.web.support.Posts.java

protected String parse(String html) {
    Document document = Jsoup.parse(html);
    Elements elements = document.select("img");
    for (Element element : elements) {
        String src = element.attr("src");
        if (src.startsWith(wallRideProperties.getMediaUrlPrefix())) {
            String style = element.attr("style");
            Pattern pattern = Pattern.compile("width: ([0-9]+)px;");
            Matcher matcher = pattern.matcher(element.attr("style"));
            if (matcher.find()) {
                String replaced = src + "?w=" + Integer.parseInt(matcher.group(1)) * 2;
                element.attr("src", replaced);
            }/*from www .  jav a  2 s.c  o m*/
        }
    }
    return document.body().html();
}

From source file:org.cellcore.code.engine.page.extractor.mb.MBPageDataExtractor.java

@Override
protected int getStock(Document doc) {
    if (!doc.getElementsContainingText("Cette carte n'est pas disponible en stock").isEmpty()) {
        return 0;
    }/*from  www  .  ja  v  a  2 s  .  co  m*/
    Elements tr = doc.select(".stock").get(0).getElementsByTag("tr");
    float iPrice = Float.MAX_VALUE;
    int iStock = 0;
    for (int i = 1; i < tr.size(); i++) {
        String val = tr.get(i).getElementsByTag("td").get(3).childNodes().get(0).attr("text");
        String stockV = tr.get(i).getElementsByTag("td").get(4).childNodes().get(1).attr("text");
        val = cleanPriceString(val);
        float price = Float.parseFloat(val);

        if (price < iPrice) {
            iPrice = price;
            iStock = Integer.parseInt(stockV.replaceAll("\\(", "").replaceAll("\\)", ""));
        }
    }
    return iStock;
}

From source file:com.josue.lottery.eap.service.core.LotoImporter.java

private void parseHtml(File file) {
    // String html = "<html><head><title>First parse</title></head>"
    // + "<body><p>Parsed HTML into a doc.</p>"
    // +/*from  w w w .ja  v  a  2s.  c om*/
    // " <table><tr><td>satu</td><td>satu-1</td></tr><tr><td>dua</td><td>dua-1</td></tr><tr><td>tiga</td><td>tiga-1</td></tr></table> "
    // + "</body></html>";

    StringBuilder sb = new StringBuilder();
    BufferedReader br = null;
    try {
        br = new BufferedReader(new FileReader(file));
    } catch (FileNotFoundException ex) {
        java.util.logging.Logger.getLogger(LotoImporter.class.getName()).log(Level.SEVERE, null, ex);
    }
    String line;
    try {
        while ((line = br.readLine()) != null) {
            sb.append(line);
        }
    } catch (IOException ex) {
        java.util.logging.Logger.getLogger(LotoImporter.class.getName()).log(Level.SEVERE, null, ex);
    }

    Document doc = Jsoup.parse(sb.toString());
    Element table = doc.select("table").first();
    Iterator<Element> iterator = table.select("td").iterator();
    while (iterator.hasNext()) {
        logger.info("text : " + iterator.next().text());
    }
    String title = doc.title();
    System.out.println("Document title : " + title);

}

From source file:hu.petabyte.redflags.engine.gear.parser.MetadataParser.java

public Notice parseDataTab(Notice notice, Document dataTab) {
    Data data = notice.getData();

    notice.setCancelled(!dataTab.select("div#cancelDoc").isEmpty());

    for (Element tr : dataTab.select("table.data tr")) {
        String field = tr.select("th").first().text();
        String value = JsoupUtils.text(tr.select("td").last());
        LOG.trace("{} data: {} = {}", notice.getId(), field, value);
        setDataField(data, field, value);
    }// w w  w. jav a  2 s  .  c  o m
    return notice;
}

From source file:me.vertretungsplan.parser.UntisInfoHeadlessParser.java

@Override
public SubstitutionSchedule getSubstitutionSchedule()
        throws IOException, JSONException, CredentialInvalidException {
    new LoginHandler(scheduleData, credential, cookieProvider).handleLogin(executor, cookieStore);

    SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData);

    Document doc = Jsoup.parse(httpGet(url, data.optString(PARAM_ENCODING, null)));
    doc.setBaseUri(url);//from  w  w w.  ja va 2s.c  o m
    Elements dayElems = doc.select("#vertretung > p > b, #vertretung > b");

    Elements frames = doc.select("frame[src*=w00]");
    if (dayElems.size() == 0 && frames.size() > 0) {
        // doc is embedded in frame
        doc = Jsoup.parse(httpGet(frames.get(0).absUrl("src"), data.optString(PARAM_ENCODING, null)));
        dayElems = doc.select("#vertretung > p > b, #vertretung > b");
    }

    for (Element dayElem : dayElems) {
        SubstitutionScheduleDay day = new SubstitutionScheduleDay();
        day.setLastChangeString("");

        String date = dayElem.text();
        day.setDateString(date);
        day.setDate(ParserUtils.parseDate(date));

        Element next;
        if (dayElem.parent().tagName().equals("p")) {
            next = dayElem.parent().nextElementSibling().nextElementSibling();
        } else {
            next = dayElem.parent().select("p").first().nextElementSibling();
        }
        parseDay(day, next, v, null);
    }
    v.setClasses(getAllClasses());
    v.setTeachers(getAllTeachers());
    return v;
}