Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:org.brunocvcunha.taskerbox.impl.crawler.CodepadAction.java

@Override
public void action(final Document entry) {

    log.debug("Validating " + entry.title());

    for (Element el : entry.select(".section")) {
        final String id = el.select("a").attr("href").replace("http://codepad.org/", "");
        String code = el.select("pre").text().replaceAll("\r?\n", " ");
        if (code.length() > 32) {
            code = code.substring(0, 32);
        }/*www .  ja  va2s  .  c  om*/

        final String title = id + " - " + code;

        if (canAct(id)) {
            addAct(id);

            spreadAction(id, title);
            serializeAlreadyAct();
            sleep(FETCH_INTERVAL);
        }

    }

}

From source file:org.brunocvcunha.taskerbox.impl.crawler.PastieAction.java

@Override
public void action(final Document entry) {

    log.debug("Validating " + entry.title());

    for (Element el : entry.select(".pastePreview")) {
        final String id = el.select("a").attr("href").replace("http://pastie.org/pastes/", "");
        String code = el.select("pre").text().replaceAll("\r?\n", " ");
        if (code.length() > 32) {
            code = code.substring(0, 32);
        }/*w  ww . j  ava 2 s . c o  m*/

        final String title = id + " - " + code;

        if (canAct(id)) {
            addAct(id);

            spreadAction(id, title);
            serializeAlreadyAct();
            sleep(FETCH_INTERVAL);
        }

    }

}

From source file:com.liato.bankdroid.banking.banks.Jojo.java

@Override
public void update() throws BankException, LoginException, BankChoiceException {
    super.update();
    if (username == null || password == null || username.length() == 0 || password.length() == 0) {
        throw new LoginException(res.getText(R.string.invalid_username_password).toString());
    }/*w  w w.j  a va 2 s.c o m*/
    urlopen = login();
    Document d = Jsoup.parse(response);

    Elements es = d.select(".saldo_ok_wrapper > table > tbody tr");
    if (es != null) {
        for (int i = 0; i < 2; i++) {

            int index = 0 + i;
            if (es.size() >= index) {
                Element e = es.get(index);
                Element name = e.select(".first").first();
                Element amount = e.select(".right").first();
                if (name != null && amount != null) {
                    Account a = new Account(name.text().replaceAll(":", "").trim(),
                            Helpers.parseBalance(amount.text()), Integer.toString(i));
                    accounts.add(a);
                    balance = balance.add(a.getBalance());
                }
            }
        }
    }

    if (accounts.isEmpty()) {
        throw new BankException(res.getText(R.string.no_accounts_found).toString());
    }
    super.updateComplete();
}

From source file:com.johan.vertretungsplan.parser.UntisSubstitutionParser.java

@Override
public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password);

    String encoding = schule.getData().getString("encoding");
    Document doc = Jsoup.parse(this.httpGet(baseUrl, encoding));
    Elements classes = doc.select("td a");

    Vertretungsplan v = new Vertretungsplan();
    List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>();
    VertretungsplanTag tag = new VertretungsplanTag();
    tage.add(tag);/*  ww  w.j a  v a 2s .c o m*/
    v.setTage(tage);

    String stand = doc.select("td[align=right]:not(:has(b))").text();
    tag.setStand(stand);

    Pattern dayPattern = Pattern.compile("\\d\\d?.\\d\\d?. / \\w+");

    for (Element klasse : classes) {
        Document classDoc = Jsoup.parse(
                httpGet(baseUrl.substring(0, baseUrl.lastIndexOf("/")) + "/" + klasse.attr("href"), encoding));
        if (tag.getDatum() == null) {
            String title = classDoc.select("font[size=5]").text();
            Matcher matcher = dayPattern.matcher(title);
            if (matcher.find())
                tag.setDatum(matcher.group());
        }

        Element table = classDoc.select("table[rules=all]").first();
        parseVertretungsplanTable(table, data, tag);
    }
    return v;
}

From source file:org.manalith.ircbot.plugin.linuxpkgfinder.DebianPackageFinder.java

public String find(String url, String commandName) {
    String result = "";
    boolean hasExacthits = false;

    try {/*from www.j  a  v  a  2 s. co  m*/

        Connection conn = Jsoup.connect(url);
        conn.timeout(20000);
        Document doc = conn.get();

        if (doc.select("#psearchres").size() == 0) {
            result = "[" + commandName + "]  ";
            return result;
        }

        Elements hits = doc.select("#psearchres").select("h2");
        int hsize = hits.size();

        if (hsize == 0)
            result = "[" + commandName + "]  ";
        for (int i = 0; i < hsize; i++) {
            if (hits.get(i).text().equals("Exact hits")) {
                hasExacthits = true;
                break;
            }

        }
        if (!hasExacthits) {
            result = "[" + commandName + "]  ";
            return result;
        }

        String pkgname = doc.select("#psearchres").select("h3").get(0).text().split("\\s")[1];

        Elements exactHits = doc.select("#psearchres").select("ul").get(0).select("li");
        int elemCnt = exactHits.size();
        Element latestElement = exactHits.get(elemCnt - 1);
        String description = latestElement.toString().split("<br>")[0].split("\\:")[1].trim();

        result = "[" + commandName + "] \u0002" + pkgname + "\u0002 - " + description + ", ";
        result += parseVersionInfo(doc) + ".";
    } catch (Exception e) {
        logger.error(e.getMessage(), e);
        result = ": " + e.getMessage();
    }

    return result;
}

From source file:neembuu.release1.externalImpl.linkhandler.SaveVideoYoutubeLinkHandlerProvider.java

/**
 * Returns the name of the video.// w  ww . j ava 2 s  . c  o  m
 * @param url The url of the video.
 * @return Returns the title of the video.
 */
private String getVideoName(String url) throws Exception {
    final String responseString = NHttpClientUtils.getData(url, NHttpClient.getNewInstance());
    Document doc = Jsoup.parse(responseString);
    return doc.select("meta[name=title]").attr("content");
}

From source file:com.clonescriptscrapper.crawler.CloneScriptScrapper.java

public void crawledCategories() throws URISyntaxException, IOException, InterruptedException, Exception {

    String url = "http://www.clonescriptdirectory.com/";

    //       Document doc = Jsoup.parse(fetchPage(new URI(url)));
    String response = "";
    response = new GetRequestHandler().doGetRequest(new URL(url));

    Document doc = Jsoup.parse(response);

    //        System.out.println("---" + doc);
    Elements ele = doc.select("table[class=categories] tbody tr td a");
    for (Element ele1 : ele) {

        objCategories = new Categories();

        String title = ele1.text();
        String href = ele1.attr("href");
        System.out.println("Title : " + ele1.text());
        System.out.println("Href : " + ele1.attr("href"));

        objCategories.setCategoryName(title);
        objCategories.setCategoryUrl(href);

        objCloneScriptDirectoryDaoImpl.insertCategoriesData(objCategories);
    }/*  www .  ja v a 2  s  .c  o  m*/

    List<Future<String>> list = new ArrayList<Future<String>>();
    ExecutorService executor = Executors.newFixedThreadPool(5);

    List<Categories> listCatogories = objCloneScriptDirectoryDaoImpl.getCategoriesDataList();

    for (Categories listCatogory : listCatogories) {

        try {
            objCloneScriptDirectoryDaoImpl.updateCategoriesData(objCategories);
            Callable worker = new CrawlingEachUrlData(listCatogory, objCloneScriptDirectoryDaoImpl);
            Future<String> future = executor.submit(worker);
            list.add(future);
        } catch (Exception exx) {
            System.out.println(exx);
        }

    }

    for (Future<String> fut : list) {
        try {
            //print the return value of Future, notice the output delay in console
            // because Future.get() waits for task to get completed
            System.out.println(new Date() + "::" + fut.get());
        } catch (InterruptedException | ExecutionException ep) {
            ep.printStackTrace();
        }
    }
    //shut down the executor service now
    executor.shutdown();

    //            objcrawlingUrlData.crawlingUrlData(href);
}

From source file:com.jejking.hh.nord.corpus.AllrisHtmlToRawDrucksache.java

private String druckSacheId(Document htmlDoc) {
    Elements druckSacheIdElememnts = htmlDoc.select("#risname > h1");
    Element druckSacheIdElement = druckSacheIdElememnts.first();
    String elementText = druckSacheIdElement.text();
    String druckSacheId = removeNonBreakingSpacesAndTrim(elementText.substring("Drucksache - ".length()));
    return druckSacheId;
}

From source file:com.halzhang.android.apps.startupnews.snkit.SNApi.java

/**
 * cookie/*  w ww.j  a  v a2s .  c om*/
 * <p>
 * pc?pccookie
 * </p>
 * 
 * @param url
 */
public void verificateCookie(Context context, String url) {
    Connection conn = JsoupFactory.getInstance(context).newJsoupConnection(url);
    if (conn != null) {
        try {
            Document doc = conn.get();
            Elements elements = doc.select("a:matches(logout)");
            if (elements.size() < 1) {
                // cookie
                SessionManager.getInstance(context).clear();
            }
        } catch (IOException e) {
            EasyTracker.getTracker().sendException(e.getMessage(), e, false);
        }

    }

}

From source file:com.subgraph.vega.internal.analysis.urls.HtmlUrlExtractor.java

private List<URI> extractURIs(Document document, String query, String attribute) {
    final ArrayList<URI> uris = new ArrayList<URI>();
    for (Element e : document.select(query)) {
        String link = e.attr(attribute);
        URI uri = createURI(link);
        if (uri != null)
            uris.add(uri);//from  w  w w  .  j  ava  2 s .  co  m
    }
    return uris;
}