Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.gorsini.searcher.CanalplaySearcher.java

public void check() throws HTMLChangeException, IOException {
    String url = makeURL("intouchables");
    Document doc = Jsoup.connect(url).referrer("http://vod.canalplay.com/").get();
    Elements movies = doc.select("div.list_movie");
    String html = movies.html();//from  w w w.j  a  v a  2s  .c o  m
    String previousHTML = null;
    LOG.finest(html);
    File file = new File(CHECK_FILENAME);
    if (file.exists()) {
        previousHTML = FileUtils.readFileToString(new File(CHECK_FILENAME));
    } else {
        LOG.log(Level.INFO, "sauvegarde check");
        FileUtils.writeStringToFile(file, html);
    }
    if (previousHTML != null && !html.equals(previousHTML)) {
        // sauvegarde la nouvelle version pour pouvoir la comparer.
        FileUtils.writeStringToFile(new File(CHECK_FILENAME + ".new"), html);
        throw new HTMLChangeException();
    } else {
        LOG.log(Level.INFO, "no change detected into HTML response");
    }
}

From source file:com.mycompany.crawlertest.GrabPage.java

@Override
public GrabPage call() throws Exception {
    Document document = null;
    document = Jsoup.parse(url, TIMEOUT);
    //processHeaders(document.select("h1"));
    processLinks(document.select("a[href]"));
    return this;
}

From source file:cvegrabber.CVEController.java

private String grabMitreData(String cveid, String data) throws IOException {
    //String url = "http://www.cvedetails.com/cve/" + cveid + "/";
    String url = "http://cve.mitre.org/cgi-bin/cvename.cgi?name=" + cveid;
    Document doc = Jsoup.connect(url).get();
    String dataToReturn = "";

    if (doc.select("h2").text().contains("ERROR")) {
        dataToReturn = "CVE " + cveid + " Unknown or CVE Not Loaded Yet.";
        return dataToReturn;
    } else if (data.matches("references")) {
        //Elements references = doc.select("td.r_average");
        Elements references = doc.select("li");
        int counter = 0;
        for (Element reference : references) {
            if (counter == 0) {
                //dataToReturn += link.select("a[href]").text();
                dataToReturn += reference.text();
                counter++;/*w  w w  .  ja  v  a 2 s.  c o m*/
            } else {
                //dataToReturn += "," + link.select("a[href]").text();
                dataToReturn += "," + reference.text();
            }
        }
    } else if (data.matches("description")) {
        //Element description = doc.select("div.cvedetailssummary").first();
        Elements tds = doc.select("td[colspan=\"2\"]");
        if (tds.eq(2).text().contains("** RESERVED **")) {
            return "No data on mitre yet.";
        }
        dataToReturn = tds.eq(2).text();
    }
    return dataToReturn;
}

From source file:com.liato.bankdroid.banking.banks.MinPension.java

@Override
protected LoginPackage preLogin() throws BankException, IOException {
    List<NameValuePair> postData = new ArrayList<>();
    urlopen = new Urllib(context, CertificateReader.getCertificates(context, R.raw.cert_minpension));
    String response = urlopen.open("https://www.minpension.se/inloggning");
    Document jDoc = Jsoup.parse(response);
    Element el = jDoc.select("input[name=__RequestVerificationToken]").first();
    if (el == null) {
        throw new BankException(res.getText(R.string.unable_to_find).toString() + " token.");
    }/*from  w ww. j a v  a2s .  c om*/
    postData.add(new BasicNameValuePair("__RequestVerificationToken", el.val()));
    postData.add(new BasicNameValuePair("viewModel.Personnummer", getUsername()));
    postData.add(new BasicNameValuePair("viewModel.Kod", getPassword()));
    LoginPackage lp = new LoginPackage(urlopen, postData, null,
            "https://www.minpension.se/inloggning/personlig-kod");
    return lp;
}

From source file:me.vertretungsplan.parser.IndiwareStundenplan24Parser.java

@Override
public SubstitutionSchedule getSubstitutionSchedule()
        throws IOException, JSONException, CredentialInvalidException {

    String baseurl;/*from  w w w. j  a va 2  s  .c  o m*/
    if (data.has("schoolNumber")) {
        baseurl = "http://www.stundenplan24.de/" + data.getString("schoolNumber") + "/vplan/";
        if (credential == null || !(credential instanceof UserPasswordCredential)) {
            throw new IOException("no login");
        }
        String login = ((UserPasswordCredential) credential).getUsername();
        String password = ((UserPasswordCredential) credential).getPassword();
        executor.auth(login, password);
    } else {
        baseurl = data.getString("baseurl") + "/";
        new LoginHandler(scheduleData, credential, cookieProvider).handleLogin(executor, cookieStore);
    }

    List<Document> docs = new ArrayList<>();

    for (int i = 0; i < MAX_DAYS; i++) {
        LocalDate date = LocalDate.now().plusDays(i);
        String dateStr = DateTimeFormat.forPattern("yyyyMMdd").print(date);
        String url = baseurl + "vdaten/VplanKl" + dateStr + ".xml?_=" + System.currentTimeMillis();
        try {
            String xml = httpGet(url, ENCODING);
            Document doc = Jsoup.parse(xml, url, Parser.xmlParser());
            if (doc.select("kopf datei").text().equals("VplanKl" + dateStr + ".xml")) {
                docs.add(doc);
            }
        } catch (HttpResponseException e) {
            if (e.getStatusCode() != 404 && e.getStatusCode() != 300)
                throw e;
        }
    }

    SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData);

    for (Document doc : docs) {
        v.addDay(parseIndiwareDay(doc, false));
    }

    v.setWebsite(baseurl);

    v.setClasses(getAllClasses());
    v.setTeachers(getAllTeachers());

    return v;
}

From source file:com.johan.vertretungsplan.parser.UntisInfoHeadlessParser.java

@Override
public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password);

    Vertretungsplan v = new Vertretungsplan();
    List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>();

    Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding")));
    Elements days = doc.select("#vertretung > p > b, #vertretung > b");
    for (Element day : days) {
        VertretungsplanTag tag = new VertretungsplanTag();
        tag.setStand("");
        tag.setDatum(day.text());//from w  w w  .  j a va  2 s .c o  m
        Element next = null;
        if (day.parent().tagName().equals("p")) {
            next = day.parent().nextElementSibling().nextElementSibling();
        } else
            next = day.parent().select("p").first().nextElementSibling();
        if (next.className().equals("subst")) {
            //Vertretungstabelle
            if (next.text().contains("Vertretungen sind nicht freigegeben"))
                continue;
            parseVertretungsplanTable(next, data, tag);
        } else {
            //Nachrichten
            parseNachrichten(next, data, tag);
            next = next.nextElementSibling().nextElementSibling();
            parseVertretungsplanTable(next, data, tag);
        }
        tage.add(tag);
    }
    v.setTage(tage);
    return v;
}

From source file:googleranking.processing.GoogleData.java

public List<String> getLinksInPage() {
    Document doc = getGoogleHtml();
    List<String> ret = new ArrayList<String>();
    try {/*  ww w .j a  v a 2 s.  com*/
        Elements links = doc.select(".g>.r>a");
        for (Element link : links) {
            String url = link.absUrl("href");
            url = URLDecoder.decode(url.substring(url.indexOf("=") + 1, url.indexOf("&")), "UTF-8");
            if (url.startsWith("http") || url.startsWith("https")) {
                ret.add(getDomain(url)); // Ads/news/etc
            }
        }
    } catch (Exception e) {
        Logger.getLogger(GoogleData.class.getName()).log(Level.SEVERE, null, e);
    }
    return ret;
}

From source file:com.astamuse.asta4d.render.RenderUtil.java

/**
 * Find out all the snippet in the passed Document and execute them. The Containing embed tag of the passed Document will be exactly
 * mixed in here too. <br>/*from  ww  w .  ja v  a 2s.c  o m*/
 * Recursively contained snippets will be executed from outside to inside, thus the inner snippets will not be executed until all of
 * their outer snippets are finished. Also, the dynamically created snippets and embed tags will comply with this rule too.
 * 
 * @param doc
 *            the Document to apply snippets
 * @throws SnippetNotResovlableException
 * @throws SnippetInvokeException
 * @throws TemplateException
 */
public final static void applySnippets(Document doc) throws SnippetNotResovlableException,
        SnippetInvokeException, TemplateException, TemplateNotFoundException {
    if (doc == null) {
        return;
    }

    applyClearAction(doc, false);

    // retrieve ready snippets
    String selector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR,
            ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_READY);
    List<Element> snippetList = new ArrayList<>(doc.select(selector));
    int readySnippetCount = snippetList.size();
    int blockedSnippetCount = 0;
    for (int i = readySnippetCount - 1; i >= 0; i--) {
        // if parent snippet has not been executed, the current snippet will
        // not be executed too.
        if (isBlockedByParentSnippet(doc, snippetList.get(i))) {
            snippetList.remove(i);
            blockedSnippetCount++;
        }
    }
    readySnippetCount = readySnippetCount - blockedSnippetCount;

    String renderDeclaration;
    Renderer renderer;
    Context context = Context.getCurrentThreadContext();
    Configuration conf = Configuration.getConfiguration();
    final SnippetInvoker invoker = conf.getSnippetInvoker();

    String refId;
    String currentTemplatePath;
    Element renderTarget;
    for (Element element : snippetList) {
        if (!conf.isSkipSnippetExecution()) {
            // for a faked snippet node which is created by template
            // analyzing process, the render target element should be its
            // child.
            if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE)
                    .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) {
                renderTarget = element.children().first();
                // the hosting element of this faked snippet has been removed by outer a snippet
                if (renderTarget == null) {
                    element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS,
                            ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED);
                    continue;
                }
            } else {
                renderTarget = element;
            }

            // we have to reset the ref of current snippet at every time to make sure the ref is always unique(duplicated snippet ref
            // could be created by list rendering)
            TemplateUtil.resetSnippetRefs(element);

            context.setCurrentRenderingElement(renderTarget);
            renderDeclaration = element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_RENDER);

            refId = element.attr(ExtNodeConstants.ATTR_SNIPPET_REF);
            currentTemplatePath = element.attr(ExtNodeConstants.ATTR_TEMPLATE_PATH);

            context.setCurrentRenderingElement(renderTarget);
            context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath);

            try {
                if (element.hasAttr(ExtNodeConstants.SNIPPET_NODE_ATTR_PARALLEL)) {
                    ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc);
                    final Context newContext = context.clone();
                    final String declaration = renderDeclaration;
                    crHelper.submitWithContext(newContext, declaration, refId, new Callable<Renderer>() {
                        @Override
                        public Renderer call() throws Exception {
                            return invoker.invoke(declaration);
                        }
                    });
                    element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS,
                            ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_WAITING);
                } else {
                    renderer = invoker.invoke(renderDeclaration);
                    applySnippetResultToElement(doc, refId, element, renderTarget, renderer);
                }
            } catch (SnippetNotResovlableException | SnippetInvokeException e) {
                throw e;
            } catch (Exception e) {
                SnippetInvokeException se = new SnippetInvokeException(
                        "Error occured when executing rendering on [" + renderDeclaration + "]:"
                                + e.getMessage(),
                        e);
                throw se;
            }

            context.setData(TRACE_VAR_TEMPLATE_PATH, null);
            context.setCurrentRenderingElement(null);
        } else {// if skip snippet
            element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS,
                    ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED);
        }
    }

    // load embed nodes which blocking parents has finished
    List<Element> embedNodeList = doc.select(ExtNodeConstants.EMBED_NODE_TAG_SELECTOR);
    int embedNodeListCount = embedNodeList.size();
    Iterator<Element> embedNodeIterator = embedNodeList.iterator();
    Element embed;
    Element embedContent;
    while (embedNodeIterator.hasNext()) {
        embed = embedNodeIterator.next();
        if (isBlockedByParentSnippet(doc, embed)) {
            embedNodeListCount--;
            continue;
        }
        embedContent = TemplateUtil.getEmbedNodeContent(embed);
        TemplateUtil.mergeBlock(doc, embedContent);
        embed.before(embedContent);
        embed.remove();
    }

    if ((readySnippetCount + embedNodeListCount) > 0) {
        TemplateUtil.regulateElement(null, doc);
        applySnippets(doc);
    } else {
        ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc);
        String delcaration = null;
        if (crHelper.hasUnCompletedTask()) {
            delcaration = null;
            try {
                FutureRendererHolder holder = crHelper.take();
                delcaration = holder.getRenderDeclaration();
                String ref = holder.getSnippetRefId();
                String reSelector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR,
                        ExtNodeConstants.ATTR_SNIPPET_REF, ref);
                Element element = doc.select(reSelector).get(0);// must have
                Element target;
                if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE)
                        .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) {
                    target = element.children().first();
                } else {
                    target = element;
                }
                applySnippetResultToElement(doc, ref, element, target, holder.getRenderer());
                applySnippets(doc);
            } catch (InterruptedException | ExecutionException e) {
                throw new SnippetInvokeException("Concurrent snippet invocation failed"
                        + (delcaration == null ? "" : " on [" + delcaration + "]"), e);
            }
        }
    }
}

From source file:amazonechoapi.AmazonEchoApi.java

public boolean httpLogin() {

    try {//from w  w  w  . j a v a  2s.c  om
        String output = httpGet("");

        Document doc = Jsoup.parse(output);
        Elements forms = doc.select("form");
        String action = forms.attr("action");

        if (action.isEmpty()) {
            return false;
        }

        Elements hidden = doc.select("input[type=hidden]");
        List<NameValuePair> nvps = new ArrayList<>();
        nvps.add(new BasicNameValuePair("email", USERNAME));
        nvps.add(new BasicNameValuePair("password", PASSWORd));
        nvps.add(new BasicNameValuePair("create", "0"));

        for (Element el1 : hidden) {
            nvps.add(new BasicNameValuePair(el1.attr("name"), el1.attr("value")));
        }

        HttpPost httpPost = new HttpPost(action);
        httpPost.setHeader(HttpHeaders.USER_AGENT,
                "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13");
        httpPost.setHeader(HttpHeaders.REFERER, BASE_URL);
        httpPost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));

        HttpResponse httpResponse = httpclient.execute(httpPost);

        httpResponse.getEntity();
        HttpEntity entity = httpResponse.getEntity();
        if (entity != null) {
            EntityUtils.consume(entity);
        }
        System.out.println("Login successful");
        return true;

    } catch (Exception e) {
        System.out.println("Login Error:" + e.getMessage());
        return false;
    }
}

From source file:com.liato.bankdroid.banking.banks.MinPension.java

@Override
public Urllib login() throws LoginException, BankException, IOException {
    LoginPackage lp = preLogin();/*from w w w.  j av a  2 s.c  o m*/

    String response = urlopen.open(lp.getLoginTarget(), lp.getPostData(), true);
    if (!response.contains("LoggaUt.aspx")) {
        throw new LoginException(res.getText(R.string.invalid_username_password).toString());
    }
    response = urlopen
            .open("https://www.minpension.se/mina-sidor/redirect?path=MinPension%2FDefault.aspx&bodyMargin=0");
    Document document = Jsoup.parse(response);
    Element e = document.select("#authenticationResult").first();
    if (e == null) {
        throw new LoginException(res.getText(R.string.invalid_username_password).toString());
    }
    List<NameValuePair> postData = new ArrayList<>();
    postData.add(new BasicNameValuePair("authenticationResult", e.val()));
    urlopen.open("https://minasidor.minpension.se/MinPension/Default.aspx", postData, true);

    return urlopen;
}