Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:edu.rowan.app.carousel.CarouselFetch.java

@Override
protected CarouselFeature[] doInBackground(Void... params) {
    String rowanURL = "http://rowan.edu";
    ArrayList<CarouselFeature> cfeatures = new ArrayList<CarouselFeature>();

    long lastUpdated = prefs.getLong(LAST_UPDATE, -1);
    if (lastUpdated > 0) {
        long timeDiff = Calendar.getInstance().getTimeInMillis() - lastUpdated;
        int hours = (int) (timeDiff / (60 * 60 * 1000));
        if (hours < UPDATE_INTERVAL) { // just load saved features
            cfeatures.addAll(loadFeaturesFromPreferences());
            //            System.out.println("Loaded features from prefernces");
            return cfeatures.toArray(new CarouselFeature[cfeatures.size()]);
        }/*from   www.ja  v a  2s  . c  om*/
    }
    // ELSE: Attempt to update
    // but check if we have available connection

    try { // Download + Parse Rowan's homepage for features
          //Toast.makeText(context, "Updating CarouselView", Toast.LENGTH_SHORT).show(); DOUH CAN"T DO THIS
        Document document = Jsoup.connect(rowanURL).get();
        Elements features = document.select(".feature ");

        for (Element feature : features) {
            String title = feature.select(".title a span").first().text();
            String description = feature.select(".description a").first().text();
            Element link = feature.select("a").first();
            String linkURL = link.attr("abs:href");
            String imageURL = link.select("img").first().attr("abs:src");

            CarouselFeature cFeature = new CarouselFeature(title, description, linkURL, imageURL, RECEIVER,
                    context);
            cfeatures.add(cFeature);
        }
        saveDataToPreferences(cfeatures);
    } catch (IOException e1) {
        e1.printStackTrace();
        return null;
    }

    return cfeatures.toArray(new CarouselFeature[cfeatures.size()]);
}

From source file:com.mycompany.grabberrasskazov.threads.ThreadForPageSave.java

public void indexStory(String pageUrl) {
    try {//w  w  w.  j  av  a2  s  . co  m
        String oldId = pageUrl.replace(GlobalVars.mainSite, "");
        if (!mainBean.storyExists(oldId)) {
            Stories r = new Stories();

            Document doc = Jsoup.connect(pageUrl)
                    .userAgent("Opera/9.80 (X11; Linux x86_64) " + "Presto/2.12.388 Version/12.16").get();

            Elements nameBlockElements = doc.select("b:containsOwn(?)");
            Element nameBlock = nameBlockElements.get(0);
            nameBlock = nameBlock.parent().parent();
            nameBlockElements = nameBlock.select("td:eq(1)");
            nameBlock = nameBlockElements.get(0);
            String storyName = nameBlock.text();
            r.setStoryName(storyName);

            // Start of processing writer
            Elements writerBlockElements = doc.select("b:containsOwn(?:)");
            Element writerBlock = writerBlockElements.get(0);
            writerBlock = writerBlock.parent().parent();
            writerBlockElements = writerBlock.select("td:eq(1)");
            writerBlock = writerBlockElements.get(0);

            String writersUrl = writerBlock.select("a:eq(0)").attr("href");
            String writersName = writerBlock.select("a:eq(0)").text();
            String writersContacts = writerBlock.select("a:eq(1)").attr("href");

            StoryWriters storyWriter = new StoryWriters();
            storyWriter.setOldId(writersUrl);
            storyWriter.setWriterEmail(writersContacts);
            storyWriter.setWriterName(writersName);
            storyWriter = mainBean.saveWriter(storyWriter);

            Set<StoriesToWritersRelations> storiesToWritersRelationses = new HashSet<StoriesToWritersRelations>();
            StoriesToWritersRelations storiesToWritersRelations = new StoriesToWritersRelations();
            storiesToWritersRelations.setStories(r);
            storiesToWritersRelations.setStoryWriters(storyWriter);
            r.setStoriesToWritersRelationses(storiesToWritersRelationses);

            // End of processing writer
            Set<StoriesToCategoriessRelations> catsRelationses = new HashSet<>();
            Elements katsInfo = doc.select("a[href*=ras.shtml?kat]");
            for (Element kat : katsInfo) {
                String katId = kat.attr("href");
                StoryCategories cat = mainBean.getCat(katId);

                StoriesToCategoriessRelations catsRelations = new StoriesToCategoriessRelations();
                catsRelations.setStoryCategories(cat);
                catsRelations.setStories(r);

                catsRelationses.add(catsRelations);

            }
            r.setStoriesToCategoriessRelationses(catsRelationses);

            Elements textBlocks = doc.select("p[align=justify]");
            Element textBlock = textBlocks.get(0);
            String textStr = textBlock.html();
            r.setStoryText(textStr.replace("\"", "'"));

            r.setOldId(oldId);

            mainBean.saveStory(r);
        }

    } catch (IOException ex) {
        ex.printStackTrace();
    }

}

From source file:cn.cuizuoli.appranking.service.GooglePlayService.java

/**
 * getAppRankingList//from w w  w.  j a  v a2 s.c om
 * @param feedType
 * @return
 */
public List<AppRanking> getAppRankingList(FeedType feedType, Category category) {
    List<AppRanking> appRankingList = new ArrayList<AppRanking>();
    try {
        if (feedType.getMediaType() == MediaType.GOOGLE) {
            String url = StringUtils.EMPTY;
            if (category == Category.ALL) {
                url = getHotUrl(feedType);
            } else {
                url = getUrl(feedType, category);
            }
            log.info("Google Play -> " + url);
            if (StringUtils.isNotBlank(url)) {

                Document doc = appRankingRestTemplate.getForObject(url, Document.class);
                Elements elements = doc.select(".card-list>.card");
                Iterator<Element> iterator = elements.iterator();
                int i = 1;
                while (iterator.hasNext()) {
                    Element element = iterator.next();
                    String appId = element.attr("data-docid");
                    String name = element.select(".details .title").attr("title");
                    String uri = element.select(".details .title").attr("href");
                    String artist = element.select(".details .subtitle").attr("title");
                    String price = element.select(".details button.price.buy>span").text();
                    String image170 = element.select(".cover .cover-image").attr("data-cover-small");
                    String image340 = element.select(".cover .cover-image").attr("data-cover-large");
                    AppRanking appRanking = new AppRanking();
                    appRanking.setAppId(appId);
                    appRanking.setDeviceType(DeviceType.ANDROID);
                    appRanking.setCountry(Country.JAPAN);
                    appRanking.setMediaType(MediaType.GOOGLE);
                    appRanking.setFeedType(feedType);
                    appRanking.setRanking(i);
                    appRanking.setTitle(name + " - " + artist);
                    appRanking.setCategory(category.getCode());
                    appRanking.setUri(GOOGLE_PLAY_DOMAIN + uri);
                    appRanking.setName(name);
                    appRanking.setArtist(artist);
                    appRanking.setPrice(price);
                    appRanking.setImage53(image170);
                    appRanking.setImage75(image170);
                    appRanking.setImage100(image340);
                    appRankingList.add(appRanking);
                    i++;
                }

            }
        }
    } catch (HttpStatusCodeException e) {
        log.error(ExceptionUtils.getFullStackTrace(e));
    } catch (Exception e) {
        log.error(ExceptionUtils.getFullStackTrace(e));
    }
    return appRankingList;
}

From source file:me.vertretungsplan.parser.ESchoolParser.java

private void parseTable(Element table, SubstitutionScheduleDay day) {
    for (Element th : table.select("th[colspan=10]")) {
        String lesson;//from  w w w  . j ava 2  s. c o  m

        Pattern pattern = Pattern.compile("(\\d+)\\. Stunde");
        Matcher matcher = pattern.matcher(th.text());
        if (matcher.find()) {
            lesson = matcher.group(1);
        } else {
            lesson = th.text();
        }

        // skip over table headers
        Element row = th.parent().nextElementSibling().nextElementSibling();
        while (row != null && row.select("th").size() == 0) {
            Substitution subst = new Substitution();
            subst.setLesson(lesson);

            Elements columns = row.select("td");

            String[] classes = columns.get(0).text().split(", |\\+");
            subst.setClasses(new HashSet<>(Arrays.asList(classes)));

            subst.setPreviousTeacher(getPreviousValue(columns.get(1)));
            subst.setTeacher(getNewValue(columns.get(1)));
            subst.setPreviousSubject(getPreviousValue(columns.get(2)));
            subst.setSubject(getNewValue(columns.get(2)));
            subst.setPreviousRoom(getPreviousValue(columns.get(3)));
            subst.setRoom(getNewValue(columns.get(3)));
            if (columns.get(4).text().isEmpty()) {
                subst.setType("Vertretung");
                subst.setColor(colorProvider.getColor("Vertretung"));
            } else {
                String desc = columns.get(4).text();
                subst.setDesc(desc);
                String recognizedType = recognizeType(desc);
                if (recognizedType == null)
                    recognizedType = "Vertretung";
                subst.setType(recognizedType);
                subst.setColor(colorProvider.getColor(recognizedType));
            }

            day.addSubstitution(subst);

            row = row.nextElementSibling();
        }
    }
}

From source file:org.apache.marmotta.ldclient.provider.phpbb.PHPBBTopicProvider.java

/**
 * Return a mapping table mapping from RDF properties to XPath Value Mappers. Each entry in the map is evaluated
 * in turn; in case the XPath expression yields a result, the property is added for the processed resource.
 *
 * @return/*from   ww  w.  j  a  v a 2 s. com*/
 * @param requestUrl
 */
@Override
protected Map<String, JSoupMapper> getMappings(String resource, String requestUrl) {
    URI uri = null;
    try {
        uri = new URI(requestUrl);
        Map<String, String> params = new HashMap<String, String>();
        for (NameValuePair p : URLEncodedUtils.parse(uri, "UTF-8")) {
            params.put(p.getName(), p.getValue());
        }

        if (params.containsKey("t")) {

            Map<String, JSoupMapper> postMappings = new HashMap<String, JSoupMapper>();
            if (params.containsKey("start")) {
                // when start is set, we only take the replies; we are in a second or further page of the topic
                postMappings.put(Namespaces.NS_SIOC + "container_of",
                        new PHPBBPostIdMapper("div#pagecontent table td.gensmall a[name]"));
            } else {
                // otherwise we also take the initial title, creator and date for the topic
                postMappings.put(Namespaces.NS_DC + "title",
                        new CssTextLiteralMapper("div#pageheader a.titles"));
                postMappings.put(Namespaces.NS_DC + "creator",
                        new CssTextLiteralMapper(new CssSelectorMapper.Selector() {
                            @Override
                            public Elements select(Element node) {
                                final Element first = node.select("div#pagecontent table b.postauthor").first();
                                if (first != null)
                                    return new Elements(first);
                                return new Elements();
                            }
                        }));
                postMappings.put(Namespaces.NS_DC + "date",
                        new PHPBBDateMapper("div#pagecontent table td.gensmall div") {
                            @Override
                            public Elements select(Element htmlDoc) {
                                final Elements sel = super.select(htmlDoc);
                                if (sel.size() > 0) {
                                    final Element e = sel.get(1);
                                    if (e != null)
                                        return new Elements(e);
                                }
                                return new Elements();
                            }
                        });
                postMappings.put(Namespaces.NS_SIOC + "has_container",
                        new PHPBBForumHrefMapper("p.breadcrumbs a") {
                            @Override
                            public Elements select(Element htmlDoc) {
                                final Element select = super.select(htmlDoc).last();
                                return select != null ? new Elements(select) : new Elements();
                            }
                        });
                postMappings.put(Namespaces.NS_SIOC + "container_of",
                        new PHPBBPostIdMapper("div#pagecontent table td.gensmall a[name]"));
            }

            return postMappings;
        } else
            throw new RuntimeException(
                    "the requested resource does not seem to identify a PHPBB topic (t=... parameter missing)");

    } catch (URISyntaxException e) {
        throw new RuntimeException(
                "the requested resource does not seem to identify a PHPBB topic (URI syntax error)");
    }

}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

static String findLastChange(Element doc, SubstitutionScheduleData scheduleData) {
    String lastChange = null;//from w  ww.j a  v a2  s. c  om

    boolean lastChangeLeft = false;
    if (scheduleData != null) {
        if (scheduleData.getData().has("stand_links")) {
            // backwards compatibility
            lastChangeLeft = scheduleData.getData().optBoolean("stand_links", false);
        } else {
            lastChangeLeft = scheduleData.getData().optBoolean(PARAM_LAST_CHANGE_LEFT, false);
        }
    }

    if (doc.select("table.mon_head").size() > 0) {
        Element monHead = doc.select("table.mon_head").first();
        lastChange = findLastChangeFromMonHeadTable(monHead);
    } else if (lastChangeLeft) {
        final String bodyHtml = doc.select("body").size() > 0 ? doc.select("body").html() : doc.html();
        lastChange = bodyHtml.substring(0, bodyHtml.indexOf("<p>") - 1);
    } else {
        List<Node> childNodes;
        if (doc instanceof Document) {
            childNodes = ((Document) doc).body().childNodes();
        } else {
            childNodes = doc.childNodes();
        }
        for (Node node : childNodes) {
            if (node instanceof Comment) {
                Comment comment = (Comment) node;
                if (comment.getData().contains("<table class=\"mon_head\">")) {
                    Document commentedDoc = Jsoup.parse(comment.getData());
                    Element monHead = commentedDoc.select("table.mon_head").first();
                    lastChange = findLastChangeFromMonHeadTable(monHead);
                    break;
                }
            }
        }
    }
    return lastChange;
}

From source file:com.glluch.profilesparser.ProfileHtmlReader.java

private ArrayList<String> ul2array(Element list) {
    Elements llist = list.select("li");
    ArrayList<String> l = new ArrayList<>();
    for (Element li : llist) {
        l.add(li.ownText());//from ww  w.jav  a 2 s.c o  m
    }
    return l;
}

From source file:de.geeksfactory.opacclient.apis.TouchPoint.java

static List<LentItem> parse_medialist(Document doc) {
    List<LentItem> media = new ArrayList<>();
    Elements copytrs = doc.select(".data tr");

    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);

    int trs = copytrs.size();
    if (trs == 1) {
        return null;
    }/* w  w w. j a  v a  2  s .  c  o  m*/
    assert (trs > 0);
    for (int i = 1; i < trs; i++) {
        Element tr = copytrs.get(i);
        LentItem item = new LentItem();

        if (tr.text().contains("keine Daten")) {
            return null;
        }
        item.setTitle(tr.select(".account-display-title").select("b, strong").text().trim());
        try {
            item.setAuthor(tr.select(".account-display-title").html().split("<br[ /]*>")[1].trim());

            String[] col3split = tr.select(".account-display-state").html().split("<br[ /]*>");
            String deadline = Jsoup.parse(col3split[0].trim()).text().trim();
            if (deadline.contains(":")) {
                // BSB Munich: <span class="hidden-sm hidden-md hidden-lg">Flligkeitsdatum : </span>26.02.2016<br>
                deadline = deadline.split(":")[1].trim();
            }
            if (deadline.contains("-")) {
                // Chemnitz: 22.07.2015 - 20.10.2015<br>
                deadline = deadline.split("-")[1].trim();
            }

            try {
                item.setDeadline(fmt.parseLocalDate(deadline).toString());
            } catch (IllegalArgumentException e1) {
                e1.printStackTrace();
            }

            if (col3split.length > 1)
                item.setHomeBranch(col3split[1].trim());

            if (tr.select("a").size() > 0) {
                for (Element link : tr.select("a")) {
                    String href = link.attr("abs:href");
                    Map<String, String> hrefq = getQueryParamsFirst(href);
                    if (hrefq.get("methodToCall").equals("renewal")) {
                        item.setProlongData(href.split("\\?")[1]);
                        item.setRenewable(true);
                        break;
                    }
                }
            }

        } catch (Exception ex) {
            ex.printStackTrace();
        }

        media.add(item);
    }
    return media;
}

From source file:com.crawler.app.run.JellyfishCrawlerSiteCB.java

/**
 * This function is called when a page is fetched and ready to be processed
 * by your program.//from w ww .java  2s.c o  m
 */
@Override
public void visit(Page page) {
    String url = page.getWebURL().getURL();
    //logger.info("URL: ", url);
    String host = "127.0.0.1";
    String port = "3306";
    String dbName = "crawler";
    String dbUser = "root";
    String dbPwd = "";
    MysqlCrawler.createConn(host, port, dbName, dbUser, dbPwd);
    System.out.println("\n URL visit: " + url);

    String href = url.toLowerCase();
    if (href.startsWith("http://careerbuilder.vn/vi/tim-viec-lam") && href.endsWith(".html")) {
        if (page.getParseData() instanceof HtmlParseData) {
            HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
            String text = htmlParseData.getText();
            String html = htmlParseData.getHtml();
            String title = htmlParseData.getTitle();

            Document doc = Jsoup.parse(html, "UTF-8");
            //doc.outputSettings().escapeMode(EscapeMode.xhtml);
            Element body = doc.body();

            Elements listDetail = body.select("section div[class=MyJobLeft]");
            String jobUrl = url;
            String jobName = listDetail.select("h1").html();
            String companyName = listDetail.select("div[class=tit_company]").html();
            String jobLocation = listDetail.select(
                    "div[class=box2Detail] ul[class=DetailJobNew] p[class=fl_left] b[itemprop=jobLocation] a")
                    .html();
            String companyAddress = listDetail
                    .select("div[class=box1Detail] p[class=TitleDetailNew] label[itemprop=addressLocality]")
                    .html();
            String companyContact = listDetail
                    .select("div[class=box1Detail] p[class=TitleDetailNew] label strong").html();
            String companyPhone = listDetail
                    .select("div[class=col-lg-6 col-md-6 col-sm-12] p[id=company_contact]").html();
            String companyWebsite = listDetail
                    .select("div[class=col-lg-6 col-md-6 col-sm-12] p a[id=company_website]").html();

            if (listDetail.isEmpty() || jobName.isEmpty()) {
                listDetail = body.select("div[id=main_content] div[id=main_content_right]");
                jobName = listDetail.select("h1 p").html();
                companyName = listDetail.select("div[class=intro_company] div[class=title_into] p").html();
                jobLocation = listDetail
                        .select("div[class=intro_job] ul[class=left_380] p[itemprop=jobLocation] a").html();
                if (listDetail.isEmpty() || jobName.isEmpty()) {
                    listDetail = body.select("div[id=main_content] div[class=content_right]");
                    jobName = listDetail.select("h1").html();
                    companyName = listDetail
                            .select("div[class=intro_company] div[class=title_into] p[class=title_comp]")
                            .html();
                    Elements gCompanyWebList = listDetail
                            .select("div[class=intro_company] div[class=title_into] p");
                    if (!gCompanyWebList.isEmpty() && gCompanyWebList.size() > 1)
                        companyWebsite = gCompanyWebList.get(1).html();
                    jobLocation = listDetail
                            .select("div[class=intro_job] ul[class=left_380] p[itemprop=jobLocation] a").html();

                }
            }
            jobName = listDetail.select("h1 a").html();
            if (jobName.isEmpty())
                jobName = listDetail.select("h1 p").html();
            if (jobName.isEmpty())
                jobName = listDetail.select("h1").html();

            System.out.println("\n Title : " + jobName);
            try {
                Integer siteID = 3;
                //String companyWebsite = "";
                /*
                MysqlCrawler.getInstance().insertJFHRContents(
                      siteID
                      , jobUrl
                      , jobName
                      , jobLocation
                      , companyName
                      , companyAddress
                      , companyPhone
                      , companyContact
                      , companyWebsite);
                       */
                //System.exit(1);
            } catch (Exception ex) {
                //System.out.println("\n Fail I : " + i);
                System.out.println("\n Ex : " + ex);
            }
        }

    }

    /*
        Header[] responseHeaders = page.getFetchResponseHeaders();
        if (responseHeaders != null) {
          logger.debug("Response headers:");
          for (Header header : responseHeaders) {
            logger.debug("\t{}: {}", header.getName(), header.getValue());
          }
        }
    */
    logger.debug("=============");
}

From source file:virgil.meanback.HistoryInfo.java

/**
 *
 * @param url//from ww  w  .  j av  a  2  s .com
 * @return
 * @throws Exception
 */
@SuppressWarnings("")
public Stock parse(String url) throws Exception {
    java.util.logging.Logger.getLogger("com.gargoylesoftware.htmlunit").setLevel(Level.OFF);
    Stock stock = new Stock();
    List<DayInfo> list = new ArrayList<>();
    /**
     * HtmlUnitweb?
     */
    WebClient wc = new WebClient(BrowserVersion.CHROME);
    wc.getOptions().setUseInsecureSSL(true);
    wc.getOptions().setJavaScriptEnabled(true); // ?JStrue  
    wc.getOptions().setCssEnabled(false); // ?css?  
    wc.getOptions().setThrowExceptionOnScriptError(false); // js??  
    wc.getOptions().setTimeout(50000); //  10S0?  
    wc.getOptions().setDoNotTrackEnabled(false);
    HtmlPage page = wc.getPage(url);
    HtmlElement documentElement = page.getDocumentElement();
    Document doc = Jsoup.parse(documentElement.asXml());
    String name = doc.select("#BIZ_IS_Name").text();
    String code = doc.select(".BIZ_IS_price_id span").text();
    code = code.substring(code.indexOf("(") + 2, code.length() - 1);
    Elements els = doc.select("#BIZ_hq_historySearch tbody tr");
    stock.setCode(code);
    stock.setName(name);
    int count = 0;
    for (Element el : els) {
        if (!el.html().contains("sum")) {
            DayInfo dayInfo = new DayInfo();
            String dateString = el.select("td.e1").text();
            SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
            Date date = format.parse(dateString);
            String open = el.select("td").eq(1).text();
            String close = el.select("td").eq(2).text();
            double cd = Double.parseDouble(close);
            String low = el.select("td").eq(5).text();
            String high = el.select("td").eq(6).text();
            String volume = el.select("td").eq(7).text();
            dayInfo.setClose(close);
            dayInfo.setDateString(dateString);
            dayInfo.setHigh(high);
            dayInfo.setLow(low);
            dayInfo.setOpen(open);
            dayInfo.setVolume(volume);
            dayInfo.setDate(date);
            list.add(dayInfo);
            count++;
            if (list.size() > 79) {
                break;
            }
        }
    }
    stock.setList(list);
    return stock;
}