List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:ru.xxlabaza.popa.pack.PackingService.java
private void processCss(Document document) { document.select("link[rel=stylesheet]:not([href^=http])").forEach(link -> { Path path = build.resolve(createPath(link.attr("href"))); log.info("Processing style '{}'", path); String content = commentRemoveService.removeComments(path); content = correctURLs(path, content); if (!path.getFileName().toString().endsWith(".min.css")) { content = compressService.compress(content, CSS); }/*from w w w .j a va 2 s . c o m*/ Element style = document.createElement("style"); style.html(content); link.after(style); link.remove(); }); }
From source file:org.ala.lucene.CreateWordPressIndex.java
/** * Read the Google sitemap file on WP site and load up a list * of page URL./*from w w w. ja v a 2 s . c o m*/ * * @throws IOException */ protected void loadSitemap() throws IOException { Document doc = Jsoup.connect(WP_SITEMAP_URI).get(); Elements pages = doc.select("loc"); logger.info("Sitemap file lists " + pages.size() + " pages."); for (Element page : pages) { // add it to list of page urls Field this.pageUrls.add(page.text()); } }
From source file:org.manalith.ircbot.plugin.linuxpkgfinder.DebianPackageFinder.java
public String parseVersionInfo(Document doc) { Elements exactHits = doc.select("#psearchres").select("ul").get(0).select("li"); String result = ""; for (Element e : exactHits) { String dist;//from w w w . j a v a 2 s .c o m dist = e.select("a").text(); String version = " ?"; String[] versionLines = e.toString().split("<br>"); for (String line : versionLines) { String v = line.split(": ")[0]; if (v.split("\\s").length > 1) continue; else { version = v; break; } } if (result.length() != 0) result += ", "; result += "\u0002" + dist + "\u0002: " + version; } return result; }
From source file:org.brunocvcunha.taskerbox.impl.crawler.SlexyAction.java
@Override public void action(final Document entry) { log.debug("Validating " + entry.title()); for (Element el : entry.select(".main").select("a")) { final String id = el.attr("href").replace("/view/", ""); final String title = id; if (canAct(id)) { addAct(id);// ww w .java 2 s . c o m spreadAction(id, title); serializeAlreadyAct(); sleep(FETCH_INTERVAL); } } }
From source file:org.brunocvcunha.taskerbox.impl.crawler.SniptAction.java
@Override public void action(final Document entry) { log.debug("Validating " + entry.title()); for (Element el : entry.select(".grid-block").select("a")) { final String id = el.attr("href").replace("http://snipt.org/", ""); final String title = id + " - " + el.text(); if (canAct(id)) { addAct(id);// w w w . j a v a 2s . co m spreadAction(id, title); serializeAlreadyAct(); sleep(FETCH_INTERVAL); } } }
From source file:cvegrabber.CVEController.java
@RequestMapping(value = "/newest", produces = { "application/json" }) public CVE[] cve() { String url = "https://web.nvd.nist.gov/view/vuln/search-results?query=&search_type=all&cves=on"; CVE[] cvearray = new CVE[10]; try {//from ww w . ja va 2s . com Document doc = Jsoup.connect(url).get(); Elements newest = doc.select( "a[id*=BodyPlaceHolder_cplPageContent_plcZones_lt_zoneCenter_VulnerabilitySearchResults_VulnResultsRepeater_CveDetailAnchor_]"); int counter = 0; for (Element cveid : newest) { if (counter == 10) break; cvearray[counter] = new CVE(cveid.text(), grabMitreData(cveid.text(), "description"), grabMitreData(cveid.text(), "references")); counter++; } for (int i = 0; i < 10; i++) { logger.info("CVEID: " + cvearray[i].getCVE() + " CVE Description: " + cvearray[i].getDescription() + " CVE References: " + cvearray[i].getReferences()); } } catch (Exception ex) { logger.error("Unable to fetch latest cves. " + ex.getMessage()); } return cvearray; }
From source file:hu.petabyte.redflags.engine.gear.parser.DocFamilyFetcher.java
public Notice parseDocFamilyTab(Notice notice, Document docFamilyTab) { for (Element memberTable : docFamilyTab.select("table.family")) { try {/* w ww . j a v a2s . c o m*/ NoticeID memberId = new NoticeID( memberTable.select("thead a[href~=TED:NOTICE:]").first().text().split(":", 2)[0]); if (!notice.getId().equals(memberId)) { Notice memberNotice = new Notice(memberId); Elements tds = memberTable.select("tbody tr").first().select("td.bgGreen"); String rawMemberPubDate = tds.get(0).text(); Date memberPubDate = new SimpleDateFormat(DATE_FORMAT).parse(rawMemberPubDate); memberNotice.getData().setPublicationDate(memberPubDate); if (tds.size() > 1) { String rawMemberDeadline = tds.get(1).text(); Date memberDeadline = new SimpleDateFormat(DATE_FORMAT).parse(rawMemberDeadline); memberNotice.getData().setDeadline(memberDeadline); } notice.getFamilyMembers().add(memberNotice); LOG.trace("{} Member found: {} - {} (deadline: {})", notice.getId(), memberId, memberPubDate, memberNotice.getData().getDeadline()); } } catch (Exception e) { LOG.warn("Cannot parse a document family member of notice " + notice.getId(), e); } } return notice; }
From source file:org.brunocvcunha.taskerbox.impl.crawler.PastebinAction.java
@Override public void action(final Document entry) { log.debug("Validating " + entry.title()); for (Element el : entry.select(".maintable").select("a")) { final String id = el.attr("href").substring(1); if (id.startsWith("archive")) { continue; }/*from w w w. j av a 2 s . c o m*/ final String title = id + " - " + el.text(); if (canAct(id)) { addAct(id); spreadAction(id, title); serializeAlreadyAct(); sleep(FETCH_INTERVAL); } } }
From source file:com.gorsini.searcher.SearcherT411.java
/** * * @param movieToSearch//from w w w . j a v a 2 s .c o m * @return ArrayList<Movie> empty if no result */ public ArrayList<Movie> searchMovie(Movie movieToSearch) { try { /* curl "http://vod.canalplay.com/pages/recherche/challengeexplorer.aspx?action=4&search=hercule" -H "Referer: http://vod.canalplay.com/" ramne que les films dispo. */ String titleToSearch = movieToSearch.getTitle(); LOG.log(Level.FINER, "titre rechercher : {0}", titleToSearch); String url = makeURL(titleToSearch); Document doc = Jsoup.connect(url).get(); Elements movies = doc.select("table.results tbody tr"); if (movies.isEmpty()) { LOG.log(Level.FINER, "no movie found with title {0}", titleToSearch); return null; } else { ArrayList<Movie> result = new ArrayList<Movie>(); for (Element movie : movies) { MovieSelectorT411 selector = new MovieSelectorT411(); Movie movieFound = selector.selectMovie(movie, movieToSearch); if (movieFound != null) { LOG.log(Level.FINER, "film trouv:{0}", movieFound.toString()); result.add(movieFound); } } return result; } } catch (Exception e) { System.out.println("problme HTTP"); // Log.e(TAG, e.getMessage()); e.printStackTrace(); return null; } }
From source file:net.acesinc.convergentui.ConvergentUIResponseFilter.java
protected boolean hasReplaceableElements(Document doc) { return doc.select("div[data-loc]").size() > 0; }