List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:com.gorsini.searcher.CanalplaySearcher.java
public void check() throws HTMLChangeException, IOException { String url = makeURL("intouchables"); Document doc = Jsoup.connect(url).referrer("http://vod.canalplay.com/").get(); Elements movies = doc.select("div.list_movie"); String html = movies.html();//from w w w.j a v a 2s .c o m String previousHTML = null; LOG.finest(html); File file = new File(CHECK_FILENAME); if (file.exists()) { previousHTML = FileUtils.readFileToString(new File(CHECK_FILENAME)); } else { LOG.log(Level.INFO, "sauvegarde check"); FileUtils.writeStringToFile(file, html); } if (previousHTML != null && !html.equals(previousHTML)) { // sauvegarde la nouvelle version pour pouvoir la comparer. FileUtils.writeStringToFile(new File(CHECK_FILENAME + ".new"), html); throw new HTMLChangeException(); } else { LOG.log(Level.INFO, "no change detected into HTML response"); } }
From source file:com.mycompany.crawlertest.GrabPage.java
@Override public GrabPage call() throws Exception { Document document = null; document = Jsoup.parse(url, TIMEOUT); //processHeaders(document.select("h1")); processLinks(document.select("a[href]")); return this; }
From source file:cvegrabber.CVEController.java
private String grabMitreData(String cveid, String data) throws IOException { //String url = "http://www.cvedetails.com/cve/" + cveid + "/"; String url = "http://cve.mitre.org/cgi-bin/cvename.cgi?name=" + cveid; Document doc = Jsoup.connect(url).get(); String dataToReturn = ""; if (doc.select("h2").text().contains("ERROR")) { dataToReturn = "CVE " + cveid + " Unknown or CVE Not Loaded Yet."; return dataToReturn; } else if (data.matches("references")) { //Elements references = doc.select("td.r_average"); Elements references = doc.select("li"); int counter = 0; for (Element reference : references) { if (counter == 0) { //dataToReturn += link.select("a[href]").text(); dataToReturn += reference.text(); counter++;/*w w w . ja v a 2 s. c o m*/ } else { //dataToReturn += "," + link.select("a[href]").text(); dataToReturn += "," + reference.text(); } } } else if (data.matches("description")) { //Element description = doc.select("div.cvedetailssummary").first(); Elements tds = doc.select("td[colspan=\"2\"]"); if (tds.eq(2).text().contains("** RESERVED **")) { return "No data on mitre yet."; } dataToReturn = tds.eq(2).text(); } return dataToReturn; }
From source file:com.liato.bankdroid.banking.banks.MinPension.java
@Override protected LoginPackage preLogin() throws BankException, IOException { List<NameValuePair> postData = new ArrayList<>(); urlopen = new Urllib(context, CertificateReader.getCertificates(context, R.raw.cert_minpension)); String response = urlopen.open("https://www.minpension.se/inloggning"); Document jDoc = Jsoup.parse(response); Element el = jDoc.select("input[name=__RequestVerificationToken]").first(); if (el == null) { throw new BankException(res.getText(R.string.unable_to_find).toString() + " token."); }/*from w ww. j a v a2s . c om*/ postData.add(new BasicNameValuePair("__RequestVerificationToken", el.val())); postData.add(new BasicNameValuePair("viewModel.Personnummer", getUsername())); postData.add(new BasicNameValuePair("viewModel.Kod", getPassword())); LoginPackage lp = new LoginPackage(urlopen, postData, null, "https://www.minpension.se/inloggning/personlig-kod"); return lp; }
From source file:me.vertretungsplan.parser.IndiwareStundenplan24Parser.java
@Override public SubstitutionSchedule getSubstitutionSchedule() throws IOException, JSONException, CredentialInvalidException { String baseurl;/*from w w w. j a va 2 s .c o m*/ if (data.has("schoolNumber")) { baseurl = "http://www.stundenplan24.de/" + data.getString("schoolNumber") + "/vplan/"; if (credential == null || !(credential instanceof UserPasswordCredential)) { throw new IOException("no login"); } String login = ((UserPasswordCredential) credential).getUsername(); String password = ((UserPasswordCredential) credential).getPassword(); executor.auth(login, password); } else { baseurl = data.getString("baseurl") + "/"; new LoginHandler(scheduleData, credential, cookieProvider).handleLogin(executor, cookieStore); } List<Document> docs = new ArrayList<>(); for (int i = 0; i < MAX_DAYS; i++) { LocalDate date = LocalDate.now().plusDays(i); String dateStr = DateTimeFormat.forPattern("yyyyMMdd").print(date); String url = baseurl + "vdaten/VplanKl" + dateStr + ".xml?_=" + System.currentTimeMillis(); try { String xml = httpGet(url, ENCODING); Document doc = Jsoup.parse(xml, url, Parser.xmlParser()); if (doc.select("kopf datei").text().equals("VplanKl" + dateStr + ".xml")) { docs.add(doc); } } catch (HttpResponseException e) { if (e.getStatusCode() != 404 && e.getStatusCode() != 300) throw e; } } SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData); for (Document doc : docs) { v.addDay(parseIndiwareDay(doc, false)); } v.setWebsite(baseurl); v.setClasses(getAllClasses()); v.setTeachers(getAllTeachers()); return v; }
From source file:com.johan.vertretungsplan.parser.UntisInfoHeadlessParser.java
@Override public Vertretungsplan getVertretungsplan() throws IOException, JSONException { new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); Vertretungsplan v = new Vertretungsplan(); List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>(); Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = new VertretungsplanTag(); tag.setStand(""); tag.setDatum(day.text());//from w w w . j a va 2 s .c o m Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } tage.add(tag); } v.setTage(tage); return v; }
From source file:googleranking.processing.GoogleData.java
public List<String> getLinksInPage() { Document doc = getGoogleHtml(); List<String> ret = new ArrayList<String>(); try {/* ww w .j a v a 2 s. com*/ Elements links = doc.select(".g>.r>a"); for (Element link : links) { String url = link.absUrl("href"); url = URLDecoder.decode(url.substring(url.indexOf("=") + 1, url.indexOf("&")), "UTF-8"); if (url.startsWith("http") || url.startsWith("https")) { ret.add(getDomain(url)); // Ads/news/etc } } } catch (Exception e) { Logger.getLogger(GoogleData.class.getName()).log(Level.SEVERE, null, e); } return ret; }
From source file:com.astamuse.asta4d.render.RenderUtil.java
/** * Find out all the snippet in the passed Document and execute them. The Containing embed tag of the passed Document will be exactly * mixed in here too. <br>/*from ww w . ja v a 2s.c o m*/ * Recursively contained snippets will be executed from outside to inside, thus the inner snippets will not be executed until all of * their outer snippets are finished. Also, the dynamically created snippets and embed tags will comply with this rule too. * * @param doc * the Document to apply snippets * @throws SnippetNotResovlableException * @throws SnippetInvokeException * @throws TemplateException */ public final static void applySnippets(Document doc) throws SnippetNotResovlableException, SnippetInvokeException, TemplateException, TemplateNotFoundException { if (doc == null) { return; } applyClearAction(doc, false); // retrieve ready snippets String selector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_READY); List<Element> snippetList = new ArrayList<>(doc.select(selector)); int readySnippetCount = snippetList.size(); int blockedSnippetCount = 0; for (int i = readySnippetCount - 1; i >= 0; i--) { // if parent snippet has not been executed, the current snippet will // not be executed too. if (isBlockedByParentSnippet(doc, snippetList.get(i))) { snippetList.remove(i); blockedSnippetCount++; } } readySnippetCount = readySnippetCount - blockedSnippetCount; String renderDeclaration; Renderer renderer; Context context = Context.getCurrentThreadContext(); Configuration conf = Configuration.getConfiguration(); final SnippetInvoker invoker = conf.getSnippetInvoker(); String refId; String currentTemplatePath; Element renderTarget; for (Element element : snippetList) { if (!conf.isSkipSnippetExecution()) { // for a faked snippet node which is created by template // analyzing process, the render target element should be its // child. if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE) .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) { renderTarget = element.children().first(); // the hosting element of this faked snippet has been removed by outer a snippet if (renderTarget == null) { element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED); continue; } } else { renderTarget = element; } // we have to reset the ref of current snippet at every time to make sure the ref is always unique(duplicated snippet ref // could be created by list rendering) TemplateUtil.resetSnippetRefs(element); context.setCurrentRenderingElement(renderTarget); renderDeclaration = element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_RENDER); refId = element.attr(ExtNodeConstants.ATTR_SNIPPET_REF); currentTemplatePath = element.attr(ExtNodeConstants.ATTR_TEMPLATE_PATH); context.setCurrentRenderingElement(renderTarget); context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath); try { if (element.hasAttr(ExtNodeConstants.SNIPPET_NODE_ATTR_PARALLEL)) { ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc); final Context newContext = context.clone(); final String declaration = renderDeclaration; crHelper.submitWithContext(newContext, declaration, refId, new Callable<Renderer>() { @Override public Renderer call() throws Exception { return invoker.invoke(declaration); } }); element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_WAITING); } else { renderer = invoker.invoke(renderDeclaration); applySnippetResultToElement(doc, refId, element, renderTarget, renderer); } } catch (SnippetNotResovlableException | SnippetInvokeException e) { throw e; } catch (Exception e) { SnippetInvokeException se = new SnippetInvokeException( "Error occured when executing rendering on [" + renderDeclaration + "]:" + e.getMessage(), e); throw se; } context.setData(TRACE_VAR_TEMPLATE_PATH, null); context.setCurrentRenderingElement(null); } else {// if skip snippet element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED); } } // load embed nodes which blocking parents has finished List<Element> embedNodeList = doc.select(ExtNodeConstants.EMBED_NODE_TAG_SELECTOR); int embedNodeListCount = embedNodeList.size(); Iterator<Element> embedNodeIterator = embedNodeList.iterator(); Element embed; Element embedContent; while (embedNodeIterator.hasNext()) { embed = embedNodeIterator.next(); if (isBlockedByParentSnippet(doc, embed)) { embedNodeListCount--; continue; } embedContent = TemplateUtil.getEmbedNodeContent(embed); TemplateUtil.mergeBlock(doc, embedContent); embed.before(embedContent); embed.remove(); } if ((readySnippetCount + embedNodeListCount) > 0) { TemplateUtil.regulateElement(null, doc); applySnippets(doc); } else { ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc); String delcaration = null; if (crHelper.hasUnCompletedTask()) { delcaration = null; try { FutureRendererHolder holder = crHelper.take(); delcaration = holder.getRenderDeclaration(); String ref = holder.getSnippetRefId(); String reSelector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR, ExtNodeConstants.ATTR_SNIPPET_REF, ref); Element element = doc.select(reSelector).get(0);// must have Element target; if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE) .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) { target = element.children().first(); } else { target = element; } applySnippetResultToElement(doc, ref, element, target, holder.getRenderer()); applySnippets(doc); } catch (InterruptedException | ExecutionException e) { throw new SnippetInvokeException("Concurrent snippet invocation failed" + (delcaration == null ? "" : " on [" + delcaration + "]"), e); } } } }
From source file:amazonechoapi.AmazonEchoApi.java
public boolean httpLogin() { try {//from w w w . j a v a 2s.c om String output = httpGet(""); Document doc = Jsoup.parse(output); Elements forms = doc.select("form"); String action = forms.attr("action"); if (action.isEmpty()) { return false; } Elements hidden = doc.select("input[type=hidden]"); List<NameValuePair> nvps = new ArrayList<>(); nvps.add(new BasicNameValuePair("email", USERNAME)); nvps.add(new BasicNameValuePair("password", PASSWORd)); nvps.add(new BasicNameValuePair("create", "0")); for (Element el1 : hidden) { nvps.add(new BasicNameValuePair(el1.attr("name"), el1.attr("value"))); } HttpPost httpPost = new HttpPost(action); httpPost.setHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13"); httpPost.setHeader(HttpHeaders.REFERER, BASE_URL); httpPost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8)); HttpResponse httpResponse = httpclient.execute(httpPost); httpResponse.getEntity(); HttpEntity entity = httpResponse.getEntity(); if (entity != null) { EntityUtils.consume(entity); } System.out.println("Login successful"); return true; } catch (Exception e) { System.out.println("Login Error:" + e.getMessage()); return false; } }
From source file:com.liato.bankdroid.banking.banks.MinPension.java
@Override public Urllib login() throws LoginException, BankException, IOException { LoginPackage lp = preLogin();/*from w w w. j av a 2 s.c o m*/ String response = urlopen.open(lp.getLoginTarget(), lp.getPostData(), true); if (!response.contains("LoggaUt.aspx")) { throw new LoginException(res.getText(R.string.invalid_username_password).toString()); } response = urlopen .open("https://www.minpension.se/mina-sidor/redirect?path=MinPension%2FDefault.aspx&bodyMargin=0"); Document document = Jsoup.parse(response); Element e = document.select("#authenticationResult").first(); if (e == null) { throw new LoginException(res.getText(R.string.invalid_username_password).toString()); } List<NameValuePair> postData = new ArrayList<>(); postData.add(new BasicNameValuePair("authenticationResult", e.val())); urlopen.open("https://minasidor.minpension.se/MinPension/Default.aspx", postData, true); return urlopen; }