Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:controllers.CNBCProxy.java

public static F.Promise<Result> index(String query) {

    if (StringUtils.isEmpty(query)) {

        F.Promise.promise(new F.Function0<Object>() {
            @Override//  w  w w  .  j a  va 2  s  .  c om
            public Object apply() throws Throwable {
                return ok(Json.toJson("Query parameter (q) not provided "));
            }

        });
    }

    String target = "all";
    String categories = "exclude";
    String partnerId = "2000";
    // ?target=all&categories=exclude&partnerId=2000&keywords=apple
    F.Promise<WSResponse> wsResponsePromise = WS.url("http://search.cnbc.com/main.do")
            .setQueryParameter("target", target).setQueryParameter("categories", categories)
            .setQueryParameter("partnerId", partnerId).setQueryParameter("keywords", query).get();

    return wsResponsePromise.map(new F.Function<WSResponse, Result>() {
        @Override
        public Result apply(WSResponse wsResponse) throws Throwable {

            String body = wsResponse.getBody();

            List<Map<String, String>> results = new ArrayList<Map<String, String>>();

            try {
                // Parse html document
                org.jsoup.nodes.Document doc = Jsoup.parse(body);
                Elements items = doc.select("div:not(.clr).padL.padR"); // Choose elements that contain classes "padL" and "padR", but not "clr"

                // Iterate through results
                for (Element item : items) {

                    Map<String, String> keyValue = new LinkedHashMap<String, String>();

                    // Add the keys and values
                    keyValue.put("title", item.select("a").text());
                    keyValue.put("content", item.select("span.cnbc_bio_content").text());
                    keyValue.put("date", CalculateDateFormat(Long
                            .parseLong(item.getElementsByTag("script").html().replaceAll("[^0-9]", ""), 10))); // Edit the date format
                    keyValue.put("url", item.select("a").attr("href"));

                    results.add(keyValue);
                }
            } catch (DOMException e) {
                e.printStackTrace();
            }

            return ok(Json.toJson(results));
        }
    });
}

From source file:de.geeksfactory.opacclient.apis.Littera.java

private static String getCover(Element doc) {
    return doc.select(".coverimage img").first().attr("src").replaceFirst("&width=\\d+", "");
}

From source file:com.hp.test.framework.htmparse.HtmlParse.java

public static String getCountsSuiteswise(String path) {
    Document htmlFile = null;//from  w  w w .ja  va  2s  .  com
    try {
        htmlFile = Jsoup.parse(new File(path), "UTF-8");
    } catch (IOException e) {
        System.out.println("Exception in parse Current Run html file" + e.getMessage());
    }

    Map<String, Map<String, Integer>> Suites_list = new HashMap<>();
    for (Element table : htmlFile.select("table[id=tableStyle]")) {
        Elements row1 = table.select("tr");
        for (int j = 0; j < row1.size(); j++) {
            Element tds1 = row1.get(j);
            Elements tds = tds1.select("td");
            String SuiteName = "";
            String Method_type = "";
            String TestCaseStatus = "";
            Map<String, Integer> test_status_list = new HashMap<String, Integer>();
            for (int i = 0; i < tds.size(); i++) {
                Element link = tds.get(i);
                String link_temp = link.toString();
                Elements href = link.select("a");

                if (i == 0) {
                    if (href.size() > 0) {
                        SuiteName = href.get(0).text();
                    }
                }

                if (i == 3) {
                    if (href.size() > 0) {
                        Method_type = href.get(0).text();
                    }

                }
                if (i == 7 && Method_type.equals("Test Method")) {
                    if (link_temp.contains("pass.png") || link_temp.contains("fail.png")
                            || link_temp.contains("skip.png")) {
                        //          img style=\"border: none;width: 25px
                        //   ing str="img  style=\"border: none;width: 25px";

                        if (link_temp.contains("pass.png")) {
                            TestCaseStatus = "pass";
                        } else if (link_temp.contains("fail.png")) {
                            TestCaseStatus = "fail";
                        } else {
                            TestCaseStatus = "skip";
                        }
                        // System.out.println("SuiteName::" + SuiteName);
                        //  System.out.println("Method_type::" + Method_type);
                        // System.out.println("TestCaseStatus::" + TestCaseStatus);
                        //  System.out.println("*****************************");

                        if (Suites_list.get(SuiteName) == null) {
                            if (TestCaseStatus.equals("pass")) {
                                test_status_list.put("pass", 1);
                                test_status_list.put("fail", 0);
                                test_status_list.put("skip", 0);
                            }

                            if (TestCaseStatus.equals("fail")) {
                                test_status_list.put("pass", 0);
                                test_status_list.put("fail", 1);
                                test_status_list.put("skip", 0);
                            }

                            if (TestCaseStatus.equals("skip")) {
                                test_status_list.put("pass", 0);
                                test_status_list.put("fail", 0);
                                test_status_list.put("skip", 1);
                            }

                            Suites_list.put(SuiteName, test_status_list);
                        } else {
                            Map<String, Integer> temp_list = Suites_list.get(SuiteName);

                            for (String status : temp_list.keySet()) {
                                if (status.equals(TestCaseStatus)) {
                                    int count = temp_list.get(status);
                                    count = count + 1;
                                    temp_list.put(status, count);
                                }
                            }
                            Suites_list.put(SuiteName, temp_list);

                        }

                    }
                }
            }
        }

    }
    String variable = "var chartData = [";
    int NoofSuites = Suites_list.size();
    int i = 1;
    for (String FeatureName : Suites_list.keySet()) {
        String feature_data = " { \n \"feature\":\"" + FeatureName + "\",\n";

        Map<String, Integer> temp_list = Suites_list.get(FeatureName);

        for (String status : temp_list.keySet()) {
            feature_data = feature_data + "\"" + status + "\":" + temp_list.get(status) + ",\n";
        }
        if (!(NoofSuites == i)) {
            feature_data = feature_data + "},\n";
        } else {
            feature_data = feature_data + "}\n";
        }
        variable = variable + feature_data;
        i = i + 1;
    }
    variable = variable + "];";
    System.out.println("Getting the Counts Functionality Wise is Completed");
    return variable;

}

From source file:controllers.FRBProxy.java

public static F.Promise<Result> index(String query) {

    if (StringUtils.isEmpty(query)) {

        F.Promise.promise(new F.Function0<Object>() {
            @Override/*  w  ww .  j  a v a2 s  . co m*/
            public Object apply() throws Throwable {
                return ok(Json.toJson("Query parameter (q) not provided "));
            }

        });
    }

    F.Promise<WSResponse> wsResponsePromise = WS.url("http://www.forbes.com/search/")
            .setQueryParameter("q", query).get();

    return wsResponsePromise.map(new F.Function<WSResponse, Result>() {
        @Override
        public Result apply(WSResponse wsResponse) throws Throwable {

            String body = wsResponse.getBody();

            List<Map<String, String>> results = new ArrayList<Map<String, String>>();

            try {

                // Insert into map
                org.jsoup.nodes.Document doc = Jsoup.parse(body);
                Elements items = doc.select("li.edittools-contentitem"); // All articles belong to this class

                for (Element item : items) {
                    Map<String, String> keyValue = new LinkedHashMap<String, String>();

                    // Check if specific article belongs to gallery class (therefore it contains an image)
                    if (item.hasClass("gallery")) {
                        // Add image key and value to map
                        keyValue.put("image", item.select("img").attr("src"));
                    }

                    // Add the rest of keys and values
                    keyValue.put("title", item.select("h2").select("a").text());
                    keyValue.put("content", item.select("p").first().ownText());
                    keyValue.put("date", item.select("time").text());
                    keyValue.put("url", item.select("h2").select("a").attr("href"));

                    results.add(keyValue);
                }
            } catch (DOMException e) {
                e.printStackTrace();
            }

            return ok(Json.toJson(results));
        }
    });
}

From source file:dsll.pinterest.crawler.Reduce.java

private static Text updateBoardContent(String url, DBCollection baordsCollection)
        throws JSONException, IOException {
    String id = url.split("/")[4];
    DBCursor c = baordsCollection.find(new BasicDBObject("ID", id));
    DBObject oldPin = c.next();//w  w w  .  java  2 s.  c  o  m
    JSONArray oldPins = new JSONArray(oldPin.get("pins").toString());

    Elements pinsCont = Jsoup.connect(url).get().select("div[class=pinWrapper]");
    // new pins
    JSONArray pins = new JSONArray();
    for (Element pinCont : pinsCont) {
        JSONObject pin = new JSONObject();
        pin.append("src", pinCont.select("div[class=pinHolder]>a").first().attr("href"));
        pins.put(pin);
    }

    List<String> oldPinURL = new ArrayList<String>();
    for (int i = 0; i < oldPins.length(); i++) {
        oldPinURL.add(oldPins.getJSONObject(i).getString("src"));
    }

    for (int i = 0; i < pins.length(); i++) {
        if (oldPinURL.contains(pins.getJSONObject(i).getString("src"))) {
            continue;
        }
        oldPins.put(pins.getJSONObject(i));
    }

    BasicDBObject newAttr = new BasicDBObject();
    newAttr.append("pins", oldPins);
    BasicDBObject update = new BasicDBObject().append("$set", newAttr);

    baordsCollection.update(new BasicDBObject("ID", id), update);
    return new Text("baord " + id + " updated...");
}

From source file:dsll.pinterest.crawler.Reduce.java

private static Text getBoardContent(String url, DBCollection boardsCollection) throws JSONException {
    // NOTE: only board information is crawled. the pins are left to the expanding process
    Document html = null;/*from  w w w. ja  v  a2 s.  c o m*/
    JSONObject board = new JSONObject();
    try {
        html = Jsoup.connect(url).get();
    } catch (Exception e) {
        return new Text("HTTP connection failed...");
    }

    // board major information
    String[] tmp = url.split("/");
    String boardID = tmp[4];
    String boardOwnrID = tmp[3];
    String boardName = html.select("h1[class=boardName]").text().trim();
    String boardDesp = html.select("p[class=description]").text().trim();
    String boardOwnr = html.select("h4[classs=fullname]").text().trim();

    // Contained Pins
    Elements pinsCont = html.select("div[class=pinWrapper]");
    JSONArray pins = new JSONArray();
    for (Element pinCont : pinsCont) {
        JSONObject pin = new JSONObject();
        pin.append("src", pinCont.select("div[class=pinHolder]>a").first().attr("href"));
        pins.put(pin);
    }
    board.append("ID", boardID);
    board.append("owner_id", boardOwnrID);
    board.append("src", url);
    board.append("name", boardName);
    board.append("description", boardDesp);
    board.append("owner", boardOwnr);
    board.append("pins", pins);

    // Optional: push data to database
    BasicDBObject dbObject = (BasicDBObject) JSON.parse(board.toString());
    boardsCollection.insert(dbObject);
    return new Text(board.toString());
}

From source file:controllers.KWProxy.java

public static F.Promise<Result> index(String query) {

    if (StringUtils.isEmpty(query)) {

        F.Promise.promise(new F.Function0<Object>() {
            @Override//from  w w w  .  jav  a2s .co m
            public Object apply() throws Throwable {
                return ok(Json.toJson("Query parameter (q) not provided "));
            }

        });
    }

    F.Promise<WSResponse> wsResponsePromise = WS.url("http://knowledge.wharton.upenn.edu/")
            .setQueryParameter("s", query).get();

    return wsResponsePromise.map(new F.Function<WSResponse, Result>() {
        @Override
        public Result apply(WSResponse wsResponse) throws Throwable {

            String body = wsResponse.getBody();

            List<Map<String, String>> results = new ArrayList<Map<String, String>>();

            try {

                // Insert into map
                org.jsoup.nodes.Document doc = Jsoup.parse(body);
                Elements items = doc.select("div.article.type-article.status-publish"); // All articles belong to this classes

                for (Element item : items) {
                    Map<String, String> keyValue = new LinkedHashMap<String, String>();

                    // Check if specific article belongs to "has-post-thumbnail" class (therefore it contains an image)
                    if (item.hasClass("has-post-thumbnail")) {
                        // Add image key and value to map
                        keyValue.put("image", item.select("img").attr("src"));
                    }

                    // Add the rest of keys and values
                    keyValue.put("title", item.select("h2").select("a").text());
                    keyValue.put("content", item.select("div.attribute.categorythumbs").first().text());
                    keyValue.put("date", item.select("ul.datestamp").select("li").first().text());
                    keyValue.put("url", item.select("h2").select("a").attr("href"));

                    results.add(keyValue);
                }
            } catch (DOMException e) {
                e.printStackTrace();
            }

            return ok(Json.toJson(results));
        }
    });
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static Element getSingleElementByQuery(Element root, String query) {
    Elements elementsByQuery = root.select(query);
    if (elementsByQuery.size() > 1) {
        String error = "Found " + elementsByQuery.size() + " elements matching query \"" + query
                + "\" inside of " + root.tagName() + "-" + root.className();
        throw new RuntimeException(error + root.html());
    }/*from   w  ww  . j  a  v a  2  s  .c o m*/
    return elementsByQuery.first();
}

From source file:dsll.pinterest.crawler.Reduce.java

private static Text updatePinContent(String url, DBCollection pinsCollection)
        throws JSONException, IOException {
    // add more related pins, include more boards
    String id = url.split("/pin/")[1];
    DBCursor c = pinsCollection.find(new BasicDBObject("ID", id));
    DBObject oldPin = c.next();/*from  w  ww  .j a  va2  s  . c  o m*/
    JSONArray oldBoards = new JSONArray(oldPin.get("board").toString());
    JSONArray oldRltPin = new JSONArray(oldPin.get("related_pins").toString());

    Document doc = Jsoup.connect(url).get();
    Element bottomDoc = doc.select("div[class=Module CloseupSidebar]").first();

    //pin board
    Element boardEle = bottomDoc.select("div[class=boardHeader]").first();
    JSONArray board = new JSONArray();
    JSONObject b = new JSONObject();
    String boardName = "";
    try {
        boardName = boardEle.select("h3[class=title]").text().trim();
    } catch (Exception ee) {
    }
    String boardSrc = "";
    try {
        boardSrc = "https://www.pinterest.com" + boardEle.select("a").attr("href").trim();
    } catch (Exception ee) {
    }
    b.append("name", boardName);
    b.append("src", boardSrc);
    board.put(b);

    //related pins
    bottomDoc = doc
            .select("div[class=closeupBottom] div[class=Module CloseupBottom] div[class=relatedPinsWrapper]")
            .first();

    JSONArray relatedPins = new JSONArray();
    Elements relatedPinsConts = bottomDoc.select("div[class=pinWrapper]");
    for (Element relatedPinsCont : relatedPinsConts) {
        JSONObject relatedPin = new JSONObject();
        relatedPin.append("src",
                "https://www.pinterest.com" + relatedPinsCont.select("div[class=pinHolder] > a").attr("href"));
        relatedPins.put(relatedPin);
    }

    // process new boards
    List<String> oldBoardNames = new ArrayList<String>();
    for (int i = 0; i < oldBoards.length(); i++) {
        oldBoardNames.add(oldBoards.getJSONObject(i).getString("name"));
    }
    for (int i = 0; i < board.length(); i++) {
        JSONObject tmp = board.getJSONObject(i);
        if (oldBoardNames.contains(tmp.getString("name"))) {
            continue;
        }
        oldBoards.put(board.get(i));
    }

    // process new related pins
    List<String> oldRelatedPins = new ArrayList<String>();
    for (int i = 0; i < oldRltPin.length(); i++) {
        oldRelatedPins.add(oldRltPin.getJSONObject(i).getString("src"));
    }
    for (int i = 0; i < relatedPins.length(); i++) {
        if (oldRelatedPins.contains(relatedPins.getJSONObject(i).get("src"))) {
            continue;
        }
        oldRltPin.put(relatedPins.getJSONObject(i));
    }

    BasicDBObject newAttr = new BasicDBObject();
    newAttr.append("board", oldBoards);
    newAttr.append("related_pins", oldRltPin);
    BasicDBObject update = new BasicDBObject().append("$set", newAttr);

    pinsCollection.update(new BasicDBObject("ID", id), update);

    return new Text("Pin " + id + " updated.");
}

From source file:io.jari.geenstijl.API.API.java

/**
 * Get article and comments (note that getArticles doesn't get the comments)
 *
 * @param url The direct url to the geenstijl article
 * @return Artikel The fetched article//from  w w w .j  av  a  2  s.c o m
 * @throws IOException
 * @throws ParseException
 */
public static Artikel getArticle(String url, Context context) throws IOException, ParseException {
    ensureCookies();
    domain = context.getSharedPreferences("geenstijl", 0).getString("gsdomain", "www.geenstijl.nl");
    Artikel artikel;
    Log.i(TAG, "GETARTICLE STEP 1/2: Getting/parsing article page & images... " + url);
    Document document = Jsoup.connect(url).get();
    Element artikel_el = document.select("#content>article").first();
    artikel = parseArtikel(artikel_el, context);

    Log.i(TAG, "GETARTICLE STEP 2/2: Parsing comments...");
    ArrayList<Comment> comments = new ArrayList<Comment>();
    int i = 0;
    Elements comments_el = document.select("#comments article");
    for (Element comment_el : comments_el) {
        i++;
        Comment comment = new Comment();
        comment.id = Integer.parseInt(comment_el.attr("id").substring(1));
        Element footer = comment_el.select("footer").first();
        StringTokenizer footer_items = new StringTokenizer(footer.text(), "|");
        comment.auteur = footer_items.nextToken().trim();

        try {
            SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MM-yyHH:mm", Locale.US);
            comment.datum = simpleDateFormat
                    .parse(footer_items.nextToken().trim() + footer_items.nextToken().trim());
        } catch (ParseException parseEx) {
            //fuck gebruikers met pipe chars in hun naam, pech, gehad.
            continue;
        }

        comment.inhoud = comment_el.select("p").first().html();

        Log.d(TAG + ".perf", "CommentParser: Parsed " + comment.id + ": " + i + "/" + comments_el.size());

        comments.add(comment);
    }

    Comment[] comm = new Comment[comments.size()];
    comments.toArray(comm);
    artikel.comments = comm;

    Log.i(TAG, "GETARTICLE: DONE");

    return artikel;
}