List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:org.brunocvcunha.taskerbox.impl.crawler.CodepadAction.java
@Override public void action(final Document entry) { log.debug("Validating " + entry.title()); for (Element el : entry.select(".section")) { final String id = el.select("a").attr("href").replace("http://codepad.org/", ""); String code = el.select("pre").text().replaceAll("\r?\n", " "); if (code.length() > 32) { code = code.substring(0, 32); }/*www . ja va2s . c om*/ final String title = id + " - " + code; if (canAct(id)) { addAct(id); spreadAction(id, title); serializeAlreadyAct(); sleep(FETCH_INTERVAL); } } }
From source file:org.brunocvcunha.taskerbox.impl.crawler.PastieAction.java
@Override public void action(final Document entry) { log.debug("Validating " + entry.title()); for (Element el : entry.select(".pastePreview")) { final String id = el.select("a").attr("href").replace("http://pastie.org/pastes/", ""); String code = el.select("pre").text().replaceAll("\r?\n", " "); if (code.length() > 32) { code = code.substring(0, 32); }/*w ww . j ava 2 s . c o m*/ final String title = id + " - " + code; if (canAct(id)) { addAct(id); spreadAction(id, title); serializeAlreadyAct(); sleep(FETCH_INTERVAL); } } }
From source file:com.liato.bankdroid.banking.banks.Jojo.java
@Override public void update() throws BankException, LoginException, BankChoiceException { super.update(); if (username == null || password == null || username.length() == 0 || password.length() == 0) { throw new LoginException(res.getText(R.string.invalid_username_password).toString()); }/*w w w.j a va 2 s.c o m*/ urlopen = login(); Document d = Jsoup.parse(response); Elements es = d.select(".saldo_ok_wrapper > table > tbody tr"); if (es != null) { for (int i = 0; i < 2; i++) { int index = 0 + i; if (es.size() >= index) { Element e = es.get(index); Element name = e.select(".first").first(); Element amount = e.select(".right").first(); if (name != null && amount != null) { Account a = new Account(name.text().replaceAll(":", "").trim(), Helpers.parseBalance(amount.text()), Integer.toString(i)); accounts.add(a); balance = balance.add(a.getBalance()); } } } } if (accounts.isEmpty()) { throw new BankException(res.getText(R.string.no_accounts_found).toString()); } super.updateComplete(); }
From source file:com.johan.vertretungsplan.parser.UntisSubstitutionParser.java
@Override public Vertretungsplan getVertretungsplan() throws IOException, JSONException { new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); String encoding = schule.getData().getString("encoding"); Document doc = Jsoup.parse(this.httpGet(baseUrl, encoding)); Elements classes = doc.select("td a"); Vertretungsplan v = new Vertretungsplan(); List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>(); VertretungsplanTag tag = new VertretungsplanTag(); tage.add(tag);/* ww w.j a v a 2s .c o m*/ v.setTage(tage); String stand = doc.select("td[align=right]:not(:has(b))").text(); tag.setStand(stand); Pattern dayPattern = Pattern.compile("\\d\\d?.\\d\\d?. / \\w+"); for (Element klasse : classes) { Document classDoc = Jsoup.parse( httpGet(baseUrl.substring(0, baseUrl.lastIndexOf("/")) + "/" + klasse.attr("href"), encoding)); if (tag.getDatum() == null) { String title = classDoc.select("font[size=5]").text(); Matcher matcher = dayPattern.matcher(title); if (matcher.find()) tag.setDatum(matcher.group()); } Element table = classDoc.select("table[rules=all]").first(); parseVertretungsplanTable(table, data, tag); } return v; }
From source file:org.manalith.ircbot.plugin.linuxpkgfinder.DebianPackageFinder.java
public String find(String url, String commandName) { String result = ""; boolean hasExacthits = false; try {/*from www.j a v a 2 s. co m*/ Connection conn = Jsoup.connect(url); conn.timeout(20000); Document doc = conn.get(); if (doc.select("#psearchres").size() == 0) { result = "[" + commandName + "] "; return result; } Elements hits = doc.select("#psearchres").select("h2"); int hsize = hits.size(); if (hsize == 0) result = "[" + commandName + "] "; for (int i = 0; i < hsize; i++) { if (hits.get(i).text().equals("Exact hits")) { hasExacthits = true; break; } } if (!hasExacthits) { result = "[" + commandName + "] "; return result; } String pkgname = doc.select("#psearchres").select("h3").get(0).text().split("\\s")[1]; Elements exactHits = doc.select("#psearchres").select("ul").get(0).select("li"); int elemCnt = exactHits.size(); Element latestElement = exactHits.get(elemCnt - 1); String description = latestElement.toString().split("<br>")[0].split("\\:")[1].trim(); result = "[" + commandName + "] \u0002" + pkgname + "\u0002 - " + description + ", "; result += parseVersionInfo(doc) + "."; } catch (Exception e) { logger.error(e.getMessage(), e); result = ": " + e.getMessage(); } return result; }
From source file:neembuu.release1.externalImpl.linkhandler.SaveVideoYoutubeLinkHandlerProvider.java
/** * Returns the name of the video.// w ww . j ava 2 s . c o m * @param url The url of the video. * @return Returns the title of the video. */ private String getVideoName(String url) throws Exception { final String responseString = NHttpClientUtils.getData(url, NHttpClient.getNewInstance()); Document doc = Jsoup.parse(responseString); return doc.select("meta[name=title]").attr("content"); }
From source file:com.clonescriptscrapper.crawler.CloneScriptScrapper.java
public void crawledCategories() throws URISyntaxException, IOException, InterruptedException, Exception { String url = "http://www.clonescriptdirectory.com/"; // Document doc = Jsoup.parse(fetchPage(new URI(url))); String response = ""; response = new GetRequestHandler().doGetRequest(new URL(url)); Document doc = Jsoup.parse(response); // System.out.println("---" + doc); Elements ele = doc.select("table[class=categories] tbody tr td a"); for (Element ele1 : ele) { objCategories = new Categories(); String title = ele1.text(); String href = ele1.attr("href"); System.out.println("Title : " + ele1.text()); System.out.println("Href : " + ele1.attr("href")); objCategories.setCategoryName(title); objCategories.setCategoryUrl(href); objCloneScriptDirectoryDaoImpl.insertCategoriesData(objCategories); }/* www . ja v a 2 s .c o m*/ List<Future<String>> list = new ArrayList<Future<String>>(); ExecutorService executor = Executors.newFixedThreadPool(5); List<Categories> listCatogories = objCloneScriptDirectoryDaoImpl.getCategoriesDataList(); for (Categories listCatogory : listCatogories) { try { objCloneScriptDirectoryDaoImpl.updateCategoriesData(objCategories); Callable worker = new CrawlingEachUrlData(listCatogory, objCloneScriptDirectoryDaoImpl); Future<String> future = executor.submit(worker); list.add(future); } catch (Exception exx) { System.out.println(exx); } } for (Future<String> fut : list) { try { //print the return value of Future, notice the output delay in console // because Future.get() waits for task to get completed System.out.println(new Date() + "::" + fut.get()); } catch (InterruptedException | ExecutionException ep) { ep.printStackTrace(); } } //shut down the executor service now executor.shutdown(); // objcrawlingUrlData.crawlingUrlData(href); }
From source file:com.jejking.hh.nord.corpus.AllrisHtmlToRawDrucksache.java
private String druckSacheId(Document htmlDoc) { Elements druckSacheIdElememnts = htmlDoc.select("#risname > h1"); Element druckSacheIdElement = druckSacheIdElememnts.first(); String elementText = druckSacheIdElement.text(); String druckSacheId = removeNonBreakingSpacesAndTrim(elementText.substring("Drucksache - ".length())); return druckSacheId; }
From source file:com.halzhang.android.apps.startupnews.snkit.SNApi.java
/** * cookie/* w ww.j a v a2s . c om*/ * <p> * pc?pccookie * </p> * * @param url */ public void verificateCookie(Context context, String url) { Connection conn = JsoupFactory.getInstance(context).newJsoupConnection(url); if (conn != null) { try { Document doc = conn.get(); Elements elements = doc.select("a:matches(logout)"); if (elements.size() < 1) { // cookie SessionManager.getInstance(context).clear(); } } catch (IOException e) { EasyTracker.getTracker().sendException(e.getMessage(), e, false); } } }
From source file:com.subgraph.vega.internal.analysis.urls.HtmlUrlExtractor.java
private List<URI> extractURIs(Document document, String query, String attribute) { final ArrayList<URI> uris = new ArrayList<URI>(); for (Element e : document.select(query)) { String link = e.attr(attribute); URI uri = createURI(link); if (uri != null) uris.add(uri);//from w w w . j ava 2 s . co m } return uris; }