Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:org.bigmouth.tfc.v1.PageIteratorImpl.java

protected void initPagination() {
    Elements paginationEle = this.asynSearchDoc.select("div.J_TItems .pagination");
    if (CollectionUtils.isNotEmpty(paginationEle)) {
        Element firstPagination = paginationEle.get(0);
        Elements aEles = firstPagination.select("a");
        if (CollectionUtils.isNotEmpty(aEles)) {
            for (Element element : aEles) {
                String href = element.attr("href");
                if (StringHelper.isNotBlank(href)) {
                    String url = Constants.PROTOCOL_PREFIX + href;
                    String text = element.text();
                    int pageNo = NumberUtils.toInt(text, -1);
                    if (pageNo != -1) {
                        this.elementData.add(new PageImpl(url, pageNo));
                    }//from  w w  w  . j av a  2 s .c  om
                }
            }
        }
    }
}

From source file:org.cellcore.code.engine.page.GathererDataExtractor.java

private Set<CardName> fetchOtherNames(String cardId, Card card) throws IOException {
    Document document = Jsoup.connect(langUrl + cardId).get();
    Elements elements = document.select(".cardItem");
    Set<CardName> cardNames = new HashSet<CardName>();
    for (Element element : elements) {

        Elements tds = element.select("td");
        String lang = tds.get(1).text();
        if (!skip(lang)) {
            String name = tds.get(0).text();
            String multiverseId = tds.get(0).select("a").get(0).attr("href");
            multiverseId = multiverseId.substring(multiverseId.indexOf("=") + 1, multiverseId.length());
            String transLang = tds.get(2).text();
            CardName cn = new CardName();
            cn.setTranslatedLang(transLang);
            cn.setLanguage(lang);/*w  w w .ja  v a 2 s . c o  m*/
            cn.setName(name);
            cn.setMultiverseId(multiverseId);
            cn.setCard(card);
            cardNames.add(cn);
        }
    }
    return cardNames;
}

From source file:org.codeexample.anchorlinks.CVAnchorContentIndexingFilter.java

public void getAnchorsImpl(Element rootElement, String anchorPattern, Set<String> anchors) {
    Elements elements = rootElement.select(anchorPattern);
    if (!elements.isEmpty()) {
        for (Element element : elements) {
            String href = element.attr("href");
            anchors.add(href.substring(1));
        }/*www.  j  av a2  s  .  c o  m*/
    }
}

From source file:org.dronix.android.unisannio.fragment.AvvisiIngFragment.java

public List<NewsIng> getNews() {
    List<NewsIng> newsList = new ArrayList<NewsIng>();

    try {/*from w  w w.  j  a va 2s . co  m*/
        Document doc = Jsoup.connect(URL).timeout(10000).get();
        Elements newsItems = doc.select("item");

        for (Element e : newsItems) {
            String title = e.select("title").first().text();
            String description = e.select("description").first().text();
            String link = e.select("link").first().text();
            String pubDate = e.select("pubDate").first().text();

            newsList.add(new NewsIng(title, link, description, pubDate, ""));
        }

    } catch (SocketException e) {
        return null;
    } catch (IOException e) {
        e.printStackTrace();
    }

    /*
     * for (News n : newsList) { Log.i("NEWS", n.getDate() + " " +
     * n.getBody()); }
     */
    return newsList;
}

From source file:org.dronix.android.unisannio.fragment.TabThree.java

public List<NewsIng> getNews() {
    List<NewsIng> newsList = new ArrayList<NewsIng>();

    try {//  w  ww.j  a v a 2 s  . c o  m
        Document doc = Jsoup.connect(URL).timeout(10000).get();
        Elements newsItems = doc.select("item");

        for (Element e : newsItems) {
            String title = e.select("title").first().text();
            String description = e.select("description").first().text();
            String link = e.select("link").first().text();
            String author = e.select("author").first().text();

            newsList.add(new NewsIng(title, link, description, "", author));
        }

    } catch (SocketException e) {
        return null;
    } catch (IOException e) {
        e.printStackTrace();
    }

    /*
     * for (News n : newsList) { Log.i("NEWS", n.getDate() + " " +
     * n.getBody()); }
     */
    return newsList;
}

From source file:org.keionline.keionline.ArticleView.java

private String getContent(String url) throws IOException {
    Document doc = Jsoup.connect(url).userAgent("Mozilla").get();
    Element data = doc.getElementsByClass("node").first();// get the third content div,
    Elements select = data.select("img");
    // Change the links to absolute!! so that images work
    for (Element e : select) {
        e.attr("src", e.absUrl("src"));
    }// w ww .  jav  a 2s .c o m
    select = data.select("a");
    for (Element e : select) {
        e.attr("href", e.absUrl("href"));
    }
    Element info = data.getElementsByClass("submitted").first();
    info.after("<hr>");
    String cont = data.toString();
    cont = CSS + cont + "</body>";
    content = cont;
    return cont;
}

From source file:org.loklak.api.search.WeiboUserInfo.java

@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    Query post = RemoteAccess.evaluate(request);

    // manage DoS
    if (post.isDoS_blackout()) {
        response.sendError(503, "your request frequency is too high");
        return;/*from  w w w .j  a v a2 s  .c om*/
    }

    String url = post.get("url", "");
    JSONObject obj = new JSONObject();
    Document doc = Jsoup.connect(url).get();
    Elements infos;
    infos = doc.getElementsByAttributeValue("class", "li_1 clearfix");

    if (infos != null) {
        Element info;
        String profile;
        for (int i = 0; i < infos.size(); i++) {
            info = infos.get(i);
            if (info.getElementsByAttributeValueContaining("href", "loc=infblog").size() == 0) {
                profile = info.getElementsByAttributeValue("class", "pt_detail").first().text().trim();
                obj.put("pro", profile);
                switch (info.getElementsByAttributeValue("class", "pt_title S_txt2").first().text()) {
                case "Nickname":
                    obj.put("username", profile);
                    break;
                case "Location":
                    obj.put("Address", profile);
                    break;
                case "Gender":
                    obj.put("Gender", profile);
                    break;
                case "??":
                    obj.put("Sexuality", profile.replace("t", "").replace("rn", ""));
                    break;
                case "":
                    obj.put("Relationship", profile.replace("t", "").replace("rn", ""));
                    break;
                case "Birthday":
                    obj.put("Birthday", profile);
                    break;
                case "":
                    obj.put("Blood", profile);
                    break;
                case "Domain Name":
                    if (info.getElementsByAttributeValueContaining("href", "loc=infdomain").size() != 0)
                        profile = info.select("a").text();
                    obj.put("Personaldomain", profile);
                    break;
                case "":
                    obj.put("Profile", profile);
                    break;
                case "Registration":
                    obj.put("Registertime", profile.replace("t", "").replace("rn", ""));
                    break;
                case "Email":
                    obj.put("Email", profile);
                    break;
                case "QQ":
                    obj.put("Qq", profile);
                    break;
                case "":
                    obj.put("College", profile.replace("t", "").replace("rn", ""));
                    break;
                case "Tags":
                    obj.put("Tag", profile.replace("t", "").replace("rn", ""));
                    break;
                }

            } else {
                String blogurl = info.select("a").text();
                obj.put("Blog", blogurl);
            }
        }
    }

    //print JSON 
    response.setCharacterEncoding("UTF-8");
    PrintWriter sos = response.getWriter();
    sos.print(obj.toString(2));
    sos.println();
}

From source file:org.mar9000.space2latex.WikiPage.java

public static void downloadWikiPageImages(WikiPage page) throws MalformedURLException {
    String pageUrl = page.json.getJSONObject(JSON_LINKS_ATTR).getString(JSON_SELF_ATTR);
    Document document = Jsoup.parseBodyFragment(page.storage);
    document.outputSettings().prettyPrint(false);
    Elements images = document.select("ac|image");
    if (images.size() > 0)
        LOGGER.info("  Download images:");
    for (Element element : images) {
        String downloadURL = null;
        String imageKey = null;//from   ww w  . ja v a  2s  .  c o  m
        // Attachment?
        Elements refs = element.select("ri|attachment");
        WikiImage image = new WikiImage();
        image.pageId = page.id;
        image.acImage = element.outerHtml();
        //
        if (refs.size() > 0) { // Attachment.
            Element riAttachment = refs.get(0);
            imageKey = riAttachment.attr("ri:filename");
            Elements riPages = riAttachment.select("ri|page");
            // Thumbnails are not found with "child/attachment" URL schema.
            boolean isThumbnail = "true".equals(element.attr("ac:thumbnail"));
            String queryURL = null;
            if (!isThumbnail) {
                queryURL = pageUrl + "/child/attachment?filename=" + URLEncoder.encode(imageKey);
            } else {
                // For thumbnail we construct directly the downloadURL without queryURL.
                /* Some pages have thumbnail images for better online reading.
                 * Here we download always the attached file to embed readable imagesinto the pdf.
                downloadURL = pageUrl.substring(0, pageUrl.indexOf("/rest/api"))
                      + "/download/thumbnails/" + page.id + "/" + URLEncoder.encode(imageKey);
                */
                downloadURL = pageUrl.substring(0, pageUrl.indexOf("/rest/api")) + "/download/attachments/"
                        + page.id + "/" + URLEncoder.encode(imageKey);
            }
            if (riPages.size() > 0) {
                // The attachment is related with another page.
                Element riPage = riPages.get(0);
                String space = riPage.attr("ri:space-key");
                String contentTitle = riPage.attr("ri:content-title").replaceAll(" ", "%20");
                String self = page.json.getJSONObject(JSON_LINKS_ATTR).getString(JSON_SELF_ATTR);
                String newQueryURL = self.substring(0, self.lastIndexOf('/')) + "?title=" + contentTitle
                        + "&spaceKey=" + space;
                JSONObject jsonNewQuery = ConfluenceRESTUtils.getURLResponse(newQueryURL);
                if (jsonNewQuery.getInt(JSON_SIZE_ATTR) == 0)
                    throw new RuntimeException(
                            "Page \"" + contentTitle + "\" in space " + space + " not found.");
                JSONObject jsonNewPage = (JSONObject) jsonNewQuery.getJSONArray(JSON_RESULTS_ATTR).get(0);
                image.pageId = jsonNewPage.getString(JSON_ID_ATTR);
                // Overwrite queryURL.
                String newPageUrl = jsonNewPage.getJSONObject(JSON_LINKS_ATTR).getString(JSON_SELF_ATTR);
                queryURL = newPageUrl + "/child/attachment?filename=" + URLEncoder.encode(imageKey);
            }
            if (!isThumbnail)
                downloadURL = getAttachmentDownloadURL(queryURL);
        } else {
            refs = element.select("ri|url");
            if (refs.size() > 0) { // URL.
                downloadURL = refs.get(0).attr("ri:value");
                URL tempURL = new URL(downloadURL);
                String urlPath = tempURL.getPath();
                imageKey = urlPath.substring(urlPath.lastIndexOf('/') + 1);
            } else {
                throw new RuntimeException("Image format unknown: " + element.toString());
            }
        }
        // Download the image data.
        image.filename = imageKey.replace(' ', '_'); // Space are not handled by LaTeX.
        if (downloadURL != null) {
            LOGGER.info("    about to download image {}/{}", new Object[] { image.pageId, image.filename });
            image.data = IOUtils.getImageFromURL(downloadURL);
        } else {
            LOGGER.info("    NULL download URL for page/image: {}/{}",
                    new Object[] { image.pageId, image.filename });
        }
        page.images.put(imageKey, image);
    }
}

From source file:org.mar9000.space2latex.WikiPage.java

public static WikiPage loadForFormat(File file) throws IOException {
    String fileContent = IOUtils.readFileAsString(file);
    Document doc = Jsoup.parseBodyFragment(fileContent);
    // Maintain input string.
    doc.outputSettings().prettyPrint(false);
    Element body = doc.body();
    Element pageElement = body.select("page").first();
    String title = pageElement.attr("title");
    String id = pageElement.attr("id");
    Element pageContent = pageElement.select("content").first();
    WikiPage page = new WikiPage(null, title, id, pageContent.html());
    page.pageContent = pageContent;//from   w  w  w.j  av  a 2  s . c  om
    // Images.
    Elements images = body.select("wikiimages").first().select("wikiimage");
    for (Element imageElement : images) {
        WikiImage image = new WikiImage();
        String acKey = imageElement.select("ac|image").first().outerHtml();
        image.filename = imageElement.attr("pageid") + "/" + imageElement.attr("filename");
        page.images.put(acKey, image);
    }
    return page;
}

From source file:org.metaservice.demo.wordpress.WordpressParser.java

@Override
public List<VersionEntry> parse(Reader s, ArchiveAddress archiveParameters) throws ParserException {
    try {//from w  w w. j a  va 2s. c o  m
        Document document = Jsoup.parse(IOUtils.toString(s), "http://wordpress.org/download/release-archive/");
        ArrayList<VersionEntry> result = new ArrayList<>();
        Elements tables = document.select("table.widefat");
        for (Element table : tables) {
            Elements rows = table.select("tr");
            // System.err.println(rows);
            for (Element row : rows) {
                Elements columns = row.select("td");
                if (columns.size() > 0) {
                    VersionEntry versionEntry = new VersionEntry();
                    versionEntry.setName(columns.get(0).text().trim());
                    versionEntry.setZip(columns.select("a[href$=zip]").attr("href"));
                    versionEntry.setTar(columns.select("a[href$=tar.gz]").attr("href"));
                    versionEntry.setIis(columns.select("a[href$=IIS.zip]").attr("href"));
                    result.add(versionEntry);
                }
            }
        }
        return result;
    } catch (IOException e) {
        throw new ParserException(e);
    }
}