List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:controllers.CNBCProxy.java
public static F.Promise<Result> index(String query) { if (StringUtils.isEmpty(query)) { F.Promise.promise(new F.Function0<Object>() { @Override// w w w . j a va 2 s . c om public Object apply() throws Throwable { return ok(Json.toJson("Query parameter (q) not provided ")); } }); } String target = "all"; String categories = "exclude"; String partnerId = "2000"; // ?target=all&categories=exclude&partnerId=2000&keywords=apple F.Promise<WSResponse> wsResponsePromise = WS.url("http://search.cnbc.com/main.do") .setQueryParameter("target", target).setQueryParameter("categories", categories) .setQueryParameter("partnerId", partnerId).setQueryParameter("keywords", query).get(); return wsResponsePromise.map(new F.Function<WSResponse, Result>() { @Override public Result apply(WSResponse wsResponse) throws Throwable { String body = wsResponse.getBody(); List<Map<String, String>> results = new ArrayList<Map<String, String>>(); try { // Parse html document org.jsoup.nodes.Document doc = Jsoup.parse(body); Elements items = doc.select("div:not(.clr).padL.padR"); // Choose elements that contain classes "padL" and "padR", but not "clr" // Iterate through results for (Element item : items) { Map<String, String> keyValue = new LinkedHashMap<String, String>(); // Add the keys and values keyValue.put("title", item.select("a").text()); keyValue.put("content", item.select("span.cnbc_bio_content").text()); keyValue.put("date", CalculateDateFormat(Long .parseLong(item.getElementsByTag("script").html().replaceAll("[^0-9]", ""), 10))); // Edit the date format keyValue.put("url", item.select("a").attr("href")); results.add(keyValue); } } catch (DOMException e) { e.printStackTrace(); } return ok(Json.toJson(results)); } }); }
From source file:de.geeksfactory.opacclient.apis.Littera.java
private static String getCover(Element doc) { return doc.select(".coverimage img").first().attr("src").replaceFirst("&width=\\d+", ""); }
From source file:com.hp.test.framework.htmparse.HtmlParse.java
public static String getCountsSuiteswise(String path) { Document htmlFile = null;//from w w w .ja va 2s . com try { htmlFile = Jsoup.parse(new File(path), "UTF-8"); } catch (IOException e) { System.out.println("Exception in parse Current Run html file" + e.getMessage()); } Map<String, Map<String, Integer>> Suites_list = new HashMap<>(); for (Element table : htmlFile.select("table[id=tableStyle]")) { Elements row1 = table.select("tr"); for (int j = 0; j < row1.size(); j++) { Element tds1 = row1.get(j); Elements tds = tds1.select("td"); String SuiteName = ""; String Method_type = ""; String TestCaseStatus = ""; Map<String, Integer> test_status_list = new HashMap<String, Integer>(); for (int i = 0; i < tds.size(); i++) { Element link = tds.get(i); String link_temp = link.toString(); Elements href = link.select("a"); if (i == 0) { if (href.size() > 0) { SuiteName = href.get(0).text(); } } if (i == 3) { if (href.size() > 0) { Method_type = href.get(0).text(); } } if (i == 7 && Method_type.equals("Test Method")) { if (link_temp.contains("pass.png") || link_temp.contains("fail.png") || link_temp.contains("skip.png")) { // img style=\"border: none;width: 25px // ing str="img style=\"border: none;width: 25px"; if (link_temp.contains("pass.png")) { TestCaseStatus = "pass"; } else if (link_temp.contains("fail.png")) { TestCaseStatus = "fail"; } else { TestCaseStatus = "skip"; } // System.out.println("SuiteName::" + SuiteName); // System.out.println("Method_type::" + Method_type); // System.out.println("TestCaseStatus::" + TestCaseStatus); // System.out.println("*****************************"); if (Suites_list.get(SuiteName) == null) { if (TestCaseStatus.equals("pass")) { test_status_list.put("pass", 1); test_status_list.put("fail", 0); test_status_list.put("skip", 0); } if (TestCaseStatus.equals("fail")) { test_status_list.put("pass", 0); test_status_list.put("fail", 1); test_status_list.put("skip", 0); } if (TestCaseStatus.equals("skip")) { test_status_list.put("pass", 0); test_status_list.put("fail", 0); test_status_list.put("skip", 1); } Suites_list.put(SuiteName, test_status_list); } else { Map<String, Integer> temp_list = Suites_list.get(SuiteName); for (String status : temp_list.keySet()) { if (status.equals(TestCaseStatus)) { int count = temp_list.get(status); count = count + 1; temp_list.put(status, count); } } Suites_list.put(SuiteName, temp_list); } } } } } } String variable = "var chartData = ["; int NoofSuites = Suites_list.size(); int i = 1; for (String FeatureName : Suites_list.keySet()) { String feature_data = " { \n \"feature\":\"" + FeatureName + "\",\n"; Map<String, Integer> temp_list = Suites_list.get(FeatureName); for (String status : temp_list.keySet()) { feature_data = feature_data + "\"" + status + "\":" + temp_list.get(status) + ",\n"; } if (!(NoofSuites == i)) { feature_data = feature_data + "},\n"; } else { feature_data = feature_data + "}\n"; } variable = variable + feature_data; i = i + 1; } variable = variable + "];"; System.out.println("Getting the Counts Functionality Wise is Completed"); return variable; }
From source file:controllers.FRBProxy.java
public static F.Promise<Result> index(String query) { if (StringUtils.isEmpty(query)) { F.Promise.promise(new F.Function0<Object>() { @Override/* w ww . j a v a2 s . co m*/ public Object apply() throws Throwable { return ok(Json.toJson("Query parameter (q) not provided ")); } }); } F.Promise<WSResponse> wsResponsePromise = WS.url("http://www.forbes.com/search/") .setQueryParameter("q", query).get(); return wsResponsePromise.map(new F.Function<WSResponse, Result>() { @Override public Result apply(WSResponse wsResponse) throws Throwable { String body = wsResponse.getBody(); List<Map<String, String>> results = new ArrayList<Map<String, String>>(); try { // Insert into map org.jsoup.nodes.Document doc = Jsoup.parse(body); Elements items = doc.select("li.edittools-contentitem"); // All articles belong to this class for (Element item : items) { Map<String, String> keyValue = new LinkedHashMap<String, String>(); // Check if specific article belongs to gallery class (therefore it contains an image) if (item.hasClass("gallery")) { // Add image key and value to map keyValue.put("image", item.select("img").attr("src")); } // Add the rest of keys and values keyValue.put("title", item.select("h2").select("a").text()); keyValue.put("content", item.select("p").first().ownText()); keyValue.put("date", item.select("time").text()); keyValue.put("url", item.select("h2").select("a").attr("href")); results.add(keyValue); } } catch (DOMException e) { e.printStackTrace(); } return ok(Json.toJson(results)); } }); }
From source file:dsll.pinterest.crawler.Reduce.java
private static Text updateBoardContent(String url, DBCollection baordsCollection) throws JSONException, IOException { String id = url.split("/")[4]; DBCursor c = baordsCollection.find(new BasicDBObject("ID", id)); DBObject oldPin = c.next();//w w w . java 2 s. c o m JSONArray oldPins = new JSONArray(oldPin.get("pins").toString()); Elements pinsCont = Jsoup.connect(url).get().select("div[class=pinWrapper]"); // new pins JSONArray pins = new JSONArray(); for (Element pinCont : pinsCont) { JSONObject pin = new JSONObject(); pin.append("src", pinCont.select("div[class=pinHolder]>a").first().attr("href")); pins.put(pin); } List<String> oldPinURL = new ArrayList<String>(); for (int i = 0; i < oldPins.length(); i++) { oldPinURL.add(oldPins.getJSONObject(i).getString("src")); } for (int i = 0; i < pins.length(); i++) { if (oldPinURL.contains(pins.getJSONObject(i).getString("src"))) { continue; } oldPins.put(pins.getJSONObject(i)); } BasicDBObject newAttr = new BasicDBObject(); newAttr.append("pins", oldPins); BasicDBObject update = new BasicDBObject().append("$set", newAttr); baordsCollection.update(new BasicDBObject("ID", id), update); return new Text("baord " + id + " updated..."); }
From source file:dsll.pinterest.crawler.Reduce.java
private static Text getBoardContent(String url, DBCollection boardsCollection) throws JSONException { // NOTE: only board information is crawled. the pins are left to the expanding process Document html = null;/*from w w w. ja v a2 s. c o m*/ JSONObject board = new JSONObject(); try { html = Jsoup.connect(url).get(); } catch (Exception e) { return new Text("HTTP connection failed..."); } // board major information String[] tmp = url.split("/"); String boardID = tmp[4]; String boardOwnrID = tmp[3]; String boardName = html.select("h1[class=boardName]").text().trim(); String boardDesp = html.select("p[class=description]").text().trim(); String boardOwnr = html.select("h4[classs=fullname]").text().trim(); // Contained Pins Elements pinsCont = html.select("div[class=pinWrapper]"); JSONArray pins = new JSONArray(); for (Element pinCont : pinsCont) { JSONObject pin = new JSONObject(); pin.append("src", pinCont.select("div[class=pinHolder]>a").first().attr("href")); pins.put(pin); } board.append("ID", boardID); board.append("owner_id", boardOwnrID); board.append("src", url); board.append("name", boardName); board.append("description", boardDesp); board.append("owner", boardOwnr); board.append("pins", pins); // Optional: push data to database BasicDBObject dbObject = (BasicDBObject) JSON.parse(board.toString()); boardsCollection.insert(dbObject); return new Text(board.toString()); }
From source file:controllers.KWProxy.java
public static F.Promise<Result> index(String query) { if (StringUtils.isEmpty(query)) { F.Promise.promise(new F.Function0<Object>() { @Override//from w w w . jav a2s .co m public Object apply() throws Throwable { return ok(Json.toJson("Query parameter (q) not provided ")); } }); } F.Promise<WSResponse> wsResponsePromise = WS.url("http://knowledge.wharton.upenn.edu/") .setQueryParameter("s", query).get(); return wsResponsePromise.map(new F.Function<WSResponse, Result>() { @Override public Result apply(WSResponse wsResponse) throws Throwable { String body = wsResponse.getBody(); List<Map<String, String>> results = new ArrayList<Map<String, String>>(); try { // Insert into map org.jsoup.nodes.Document doc = Jsoup.parse(body); Elements items = doc.select("div.article.type-article.status-publish"); // All articles belong to this classes for (Element item : items) { Map<String, String> keyValue = new LinkedHashMap<String, String>(); // Check if specific article belongs to "has-post-thumbnail" class (therefore it contains an image) if (item.hasClass("has-post-thumbnail")) { // Add image key and value to map keyValue.put("image", item.select("img").attr("src")); } // Add the rest of keys and values keyValue.put("title", item.select("h2").select("a").text()); keyValue.put("content", item.select("div.attribute.categorythumbs").first().text()); keyValue.put("date", item.select("ul.datestamp").select("li").first().text()); keyValue.put("url", item.select("h2").select("a").attr("href")); results.add(keyValue); } } catch (DOMException e) { e.printStackTrace(); } return ok(Json.toJson(results)); } }); }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
private static Element getSingleElementByQuery(Element root, String query) { Elements elementsByQuery = root.select(query); if (elementsByQuery.size() > 1) { String error = "Found " + elementsByQuery.size() + " elements matching query \"" + query + "\" inside of " + root.tagName() + "-" + root.className(); throw new RuntimeException(error + root.html()); }/*from w ww . j a v a 2 s .c o m*/ return elementsByQuery.first(); }
From source file:dsll.pinterest.crawler.Reduce.java
private static Text updatePinContent(String url, DBCollection pinsCollection) throws JSONException, IOException { // add more related pins, include more boards String id = url.split("/pin/")[1]; DBCursor c = pinsCollection.find(new BasicDBObject("ID", id)); DBObject oldPin = c.next();/*from w ww .j a va2 s . c o m*/ JSONArray oldBoards = new JSONArray(oldPin.get("board").toString()); JSONArray oldRltPin = new JSONArray(oldPin.get("related_pins").toString()); Document doc = Jsoup.connect(url).get(); Element bottomDoc = doc.select("div[class=Module CloseupSidebar]").first(); //pin board Element boardEle = bottomDoc.select("div[class=boardHeader]").first(); JSONArray board = new JSONArray(); JSONObject b = new JSONObject(); String boardName = ""; try { boardName = boardEle.select("h3[class=title]").text().trim(); } catch (Exception ee) { } String boardSrc = ""; try { boardSrc = "https://www.pinterest.com" + boardEle.select("a").attr("href").trim(); } catch (Exception ee) { } b.append("name", boardName); b.append("src", boardSrc); board.put(b); //related pins bottomDoc = doc .select("div[class=closeupBottom] div[class=Module CloseupBottom] div[class=relatedPinsWrapper]") .first(); JSONArray relatedPins = new JSONArray(); Elements relatedPinsConts = bottomDoc.select("div[class=pinWrapper]"); for (Element relatedPinsCont : relatedPinsConts) { JSONObject relatedPin = new JSONObject(); relatedPin.append("src", "https://www.pinterest.com" + relatedPinsCont.select("div[class=pinHolder] > a").attr("href")); relatedPins.put(relatedPin); } // process new boards List<String> oldBoardNames = new ArrayList<String>(); for (int i = 0; i < oldBoards.length(); i++) { oldBoardNames.add(oldBoards.getJSONObject(i).getString("name")); } for (int i = 0; i < board.length(); i++) { JSONObject tmp = board.getJSONObject(i); if (oldBoardNames.contains(tmp.getString("name"))) { continue; } oldBoards.put(board.get(i)); } // process new related pins List<String> oldRelatedPins = new ArrayList<String>(); for (int i = 0; i < oldRltPin.length(); i++) { oldRelatedPins.add(oldRltPin.getJSONObject(i).getString("src")); } for (int i = 0; i < relatedPins.length(); i++) { if (oldRelatedPins.contains(relatedPins.getJSONObject(i).get("src"))) { continue; } oldRltPin.put(relatedPins.getJSONObject(i)); } BasicDBObject newAttr = new BasicDBObject(); newAttr.append("board", oldBoards); newAttr.append("related_pins", oldRltPin); BasicDBObject update = new BasicDBObject().append("$set", newAttr); pinsCollection.update(new BasicDBObject("ID", id), update); return new Text("Pin " + id + " updated."); }
From source file:io.jari.geenstijl.API.API.java
/** * Get article and comments (note that getArticles doesn't get the comments) * * @param url The direct url to the geenstijl article * @return Artikel The fetched article//from w w w .j av a 2 s.c o m * @throws IOException * @throws ParseException */ public static Artikel getArticle(String url, Context context) throws IOException, ParseException { ensureCookies(); domain = context.getSharedPreferences("geenstijl", 0).getString("gsdomain", "www.geenstijl.nl"); Artikel artikel; Log.i(TAG, "GETARTICLE STEP 1/2: Getting/parsing article page & images... " + url); Document document = Jsoup.connect(url).get(); Element artikel_el = document.select("#content>article").first(); artikel = parseArtikel(artikel_el, context); Log.i(TAG, "GETARTICLE STEP 2/2: Parsing comments..."); ArrayList<Comment> comments = new ArrayList<Comment>(); int i = 0; Elements comments_el = document.select("#comments article"); for (Element comment_el : comments_el) { i++; Comment comment = new Comment(); comment.id = Integer.parseInt(comment_el.attr("id").substring(1)); Element footer = comment_el.select("footer").first(); StringTokenizer footer_items = new StringTokenizer(footer.text(), "|"); comment.auteur = footer_items.nextToken().trim(); try { SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MM-yyHH:mm", Locale.US); comment.datum = simpleDateFormat .parse(footer_items.nextToken().trim() + footer_items.nextToken().trim()); } catch (ParseException parseEx) { //fuck gebruikers met pipe chars in hun naam, pech, gehad. continue; } comment.inhoud = comment_el.select("p").first().html(); Log.d(TAG + ".perf", "CommentParser: Parsed " + comment.id + ": " + i + "/" + comments_el.size()); comments.add(comment); } Comment[] comm = new Comment[comments.size()]; comments.toArray(comm); artikel.comments = comm; Log.i(TAG, "GETARTICLE: DONE"); return artikel; }