Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:ru.xxlabaza.popa.pack.PackingService.java

private void processCss(Document document) {
    document.select("link[rel=stylesheet]:not([href^=http])").forEach(link -> {
        Path path = build.resolve(createPath(link.attr("href")));
        log.info("Processing style '{}'", path);

        String content = commentRemoveService.removeComments(path);
        content = correctURLs(path, content);

        if (!path.getFileName().toString().endsWith(".min.css")) {
            content = compressService.compress(content, CSS);
        }/*from w w w .j  a  va  2 s .  c  o m*/

        Element style = document.createElement("style");
        style.html(content);

        link.after(style);
        link.remove();
    });
}

From source file:org.ala.lucene.CreateWordPressIndex.java

/**
 * Read the Google sitemap file on WP site and load up a list
 * of page URL./*from   w w  w.  ja v  a 2  s  .  c  o m*/
 *
 * @throws IOException
 */
protected void loadSitemap() throws IOException {
    Document doc = Jsoup.connect(WP_SITEMAP_URI).get();
    Elements pages = doc.select("loc");
    logger.info("Sitemap file lists " + pages.size() + " pages.");

    for (Element page : pages) {
        // add it to list of page urls Field
        this.pageUrls.add(page.text());
    }
}

From source file:org.manalith.ircbot.plugin.linuxpkgfinder.DebianPackageFinder.java

public String parseVersionInfo(Document doc) {
    Elements exactHits = doc.select("#psearchres").select("ul").get(0).select("li");
    String result = "";

    for (Element e : exactHits) {
        String dist;//from w  w  w . j  a v  a 2  s  .c  o  m
        dist = e.select("a").text();

        String version = "  ?";
        String[] versionLines = e.toString().split("<br>");

        for (String line : versionLines) {
            String v = line.split(": ")[0];
            if (v.split("\\s").length > 1)
                continue;
            else {
                version = v;
                break;
            }
        }

        if (result.length() != 0)
            result += ", ";
        result += "\u0002" + dist + "\u0002: " + version;
    }

    return result;
}

From source file:org.brunocvcunha.taskerbox.impl.crawler.SlexyAction.java

@Override
public void action(final Document entry) {

    log.debug("Validating " + entry.title());

    for (Element el : entry.select(".main").select("a")) {
        final String id = el.attr("href").replace("/view/", "");

        final String title = id;

        if (canAct(id)) {
            addAct(id);//  ww w  .java 2 s .  c o m

            spreadAction(id, title);
            serializeAlreadyAct();
            sleep(FETCH_INTERVAL);
        }

    }

}

From source file:org.brunocvcunha.taskerbox.impl.crawler.SniptAction.java

@Override
public void action(final Document entry) {

    log.debug("Validating " + entry.title());

    for (Element el : entry.select(".grid-block").select("a")) {
        final String id = el.attr("href").replace("http://snipt.org/", "");

        final String title = id + " - " + el.text();

        if (canAct(id)) {
            addAct(id);//  w w  w .  j a  v  a 2s .  co  m

            spreadAction(id, title);
            serializeAlreadyAct();
            sleep(FETCH_INTERVAL);
        }

    }

}

From source file:cvegrabber.CVEController.java

@RequestMapping(value = "/newest", produces = { "application/json" })
public CVE[] cve() {

    String url = "https://web.nvd.nist.gov/view/vuln/search-results?query=&search_type=all&cves=on";
    CVE[] cvearray = new CVE[10];
    try {//from   ww w . ja va 2s . com
        Document doc = Jsoup.connect(url).get();
        Elements newest = doc.select(
                "a[id*=BodyPlaceHolder_cplPageContent_plcZones_lt_zoneCenter_VulnerabilitySearchResults_VulnResultsRepeater_CveDetailAnchor_]");
        int counter = 0;
        for (Element cveid : newest) {
            if (counter == 10)
                break;
            cvearray[counter] = new CVE(cveid.text(), grabMitreData(cveid.text(), "description"),
                    grabMitreData(cveid.text(), "references"));
            counter++;
        }
        for (int i = 0; i < 10; i++) {
            logger.info("CVEID: " + cvearray[i].getCVE() + " CVE Description: " + cvearray[i].getDescription()
                    + " CVE References: " + cvearray[i].getReferences());
        }
    } catch (Exception ex) {
        logger.error("Unable to fetch latest cves. " + ex.getMessage());
    }
    return cvearray;
}

From source file:hu.petabyte.redflags.engine.gear.parser.DocFamilyFetcher.java

public Notice parseDocFamilyTab(Notice notice, Document docFamilyTab) {
    for (Element memberTable : docFamilyTab.select("table.family")) {
        try {/* w ww  . j a  v a2s . c o m*/
            NoticeID memberId = new NoticeID(
                    memberTable.select("thead a[href~=TED:NOTICE:]").first().text().split(":", 2)[0]);
            if (!notice.getId().equals(memberId)) {
                Notice memberNotice = new Notice(memberId);

                Elements tds = memberTable.select("tbody tr").first().select("td.bgGreen");

                String rawMemberPubDate = tds.get(0).text();
                Date memberPubDate = new SimpleDateFormat(DATE_FORMAT).parse(rawMemberPubDate);
                memberNotice.getData().setPublicationDate(memberPubDate);

                if (tds.size() > 1) {
                    String rawMemberDeadline = tds.get(1).text();
                    Date memberDeadline = new SimpleDateFormat(DATE_FORMAT).parse(rawMemberDeadline);
                    memberNotice.getData().setDeadline(memberDeadline);
                }

                notice.getFamilyMembers().add(memberNotice);
                LOG.trace("{} Member found: {} - {} (deadline: {})", notice.getId(), memberId, memberPubDate,
                        memberNotice.getData().getDeadline());
            }
        } catch (Exception e) {
            LOG.warn("Cannot parse a document family member of notice " + notice.getId(), e);
        }
    }
    return notice;
}

From source file:org.brunocvcunha.taskerbox.impl.crawler.PastebinAction.java

@Override
public void action(final Document entry) {

    log.debug("Validating " + entry.title());

    for (Element el : entry.select(".maintable").select("a")) {
        final String id = el.attr("href").substring(1);
        if (id.startsWith("archive")) {
            continue;
        }/*from w  w w.  j  av a 2 s  .  c o  m*/

        final String title = id + " - " + el.text();

        if (canAct(id)) {
            addAct(id);

            spreadAction(id, title);
            serializeAlreadyAct();
            sleep(FETCH_INTERVAL);
        }

    }

}

From source file:com.gorsini.searcher.SearcherT411.java

/**
 *
 * @param movieToSearch//from w w w .  j a  v a  2  s .c  o  m
 * @return ArrayList<Movie> empty if no result
 */
public ArrayList<Movie> searchMovie(Movie movieToSearch) {
    try {
        /*
         curl "http://vod.canalplay.com/pages/recherche/challengeexplorer.aspx?action=4&search=hercule" -H "Referer: http://vod.canalplay.com/"
         ramne que les films dispo. 
         */
        String titleToSearch = movieToSearch.getTitle();
        LOG.log(Level.FINER, "titre  rechercher : {0}", titleToSearch);
        String url = makeURL(titleToSearch);
        Document doc = Jsoup.connect(url).get();
        Elements movies = doc.select("table.results tbody tr");

        if (movies.isEmpty()) {
            LOG.log(Level.FINER, "no movie found with title {0}", titleToSearch);
            return null;
        } else {
            ArrayList<Movie> result = new ArrayList<Movie>();
            for (Element movie : movies) {
                MovieSelectorT411 selector = new MovieSelectorT411();
                Movie movieFound = selector.selectMovie(movie, movieToSearch);
                if (movieFound != null) {
                    LOG.log(Level.FINER, "film trouv:{0}", movieFound.toString());
                    result.add(movieFound);
                }
            }
            return result;
        }
    } catch (Exception e) {
        System.out.println("problme HTTP");
        //            Log.e(TAG, e.getMessage());
        e.printStackTrace();
        return null;
    }
}

From source file:net.acesinc.convergentui.ConvergentUIResponseFilter.java

protected boolean hasReplaceableElements(Document doc) {
    return doc.select("div[data-loc]").size() > 0;
}