List of usage examples for org.jsoup.nodes Element getElementsByTag
public Elements getElementsByTag(String tagName)
From source file:app.sunstreak.yourpisd.net.Parser.java
/** * returns value of pageUniqueId from html * String must contain the following block: * <input type="hidden" name="PageUniqueId" id="PageUniqueId" value="8bdc977f-ccaf-4a18-b4dd-20d1406fad6a" /> */// ww w . j a v a2s .co m public static String pageUniqueId(Element doc) { Elements inputElements = doc.getElementsByTag("input"); for (Element e : inputElements) if (e.attr("name").equals("PageUniqueId")) return e.attr("value"); return null; }
From source file:app.sunstreak.yourpisd.net.Parser.java
public static boolean accessGrantedEditure(String html) { Element doc = Jsoup.parse(html); String title = doc.getElementsByTag("title").text(); return !title.equals(LOGIN_FAILURE_TITLE) ? true : false; }
From source file:app.sunstreak.yourpisd.net.Parser.java
/** * /*w w w. java 2s . co m*/ * @param html html source code of https://sso.portal.mypisd.net/cas/login?service=http%3A%2F%2Fportal.mypisd.net%2Fc%2Fportal%2Flogin * @return the value embedded in the <input type="hidden" name="lt" value=""> block */ public static String portalLt(String html) { Element doc = Jsoup.parse(html); Elements inputTags = doc.getElementsByTag("input"); //Shortcut // if (inputTags.get(4).attr("name").equals("lt")) // return inputTags.get(4).attr("value"); // else { for (Element tag : inputTags) { if (tag.attr("name").equals("lt")) return tag.attr("value"); } // } return null; }
From source file:app.sunstreak.yourpisd.net.Parser.java
public static Object[] termCategoryGrades(String html) throws JSONException { JSONArray termCategoryGrades = new JSONArray(); Element doc = Jsoup.parse(html); Element categoryTable = doc.getElementById("Category"); Elements rows = categoryTable.getElementsByTag("tbody").get(0).getElementsByTag("tr"); for (Element row : rows) { JSONObject category = new JSONObject(); Elements columns = row.getElementsByTag("td"); for (int i = 0; i < columns.size(); i++) { String value = columns.get(i).text(); // do not store empty values! if (value.equals("")) continue; // first try to cast as int. try { category.putOpt(categoryTableHeader(i), Integer.parseInt(value)); // if not int, try double } catch (NumberFormatException e) { try { category.putOpt(categoryTableHeader(i), Double.parseDouble(value)); // if not double, use string } catch (NumberFormatException f) { category.putOpt(categoryTableHeader(i), value); }//from w w w . j av a 2 s . co m } } termCategoryGrades.put(category); } // The average for the six weeks is int average = -1; try { Element finalGrade = doc.getElementById("finalGrade"); average = Integer.parseInt(finalGrade.getElementsByTag("td").get(3).text()); } catch (NullPointerException e) { // Let average be -1 } return new Object[] { termCategoryGrades, average }; }
From source file:app.sunstreak.yourpisd.net.Parser.java
public static JSONArray detailedReport(String html) throws JSONException { Element doc = Jsoup.parse(html); // System.out.println(html); Element assignments = doc.getElementsByAttributeValue("id", "Assignments").get(0); Elements tableRows = assignments.getElementsByTag("tbody").get(0).getElementsByTag("tr"); JSONArray grades = new JSONArray(); for (Element tr : tableRows) { JSONObject assignment = new JSONObject(); Elements columns = tr.getElementsByTag("td"); for (int i = 0; i < columns.size(); i++) { String value = columns.get(i).text(); // do not store empty values! if (value.equals("")) continue; // first try to cast as int. try { assignment.putOpt(assignmentTableHeader(i), Integer.parseInt(value)); // if not int, try double } catch (NumberFormatException e) { try { assignment.putOpt(assignmentTableHeader(i), Double.parseDouble(value)); // if not double, use string } catch (NumberFormatException f) { assignment.putOpt(assignmentTableHeader(i), value); }/* w w w .j ava 2 s .co m*/ } } String assignmentDetailLink = tr.getElementsByTag("a").get(0).attr("href"); Matcher matcher = Pattern.compile(".+" + "assignmentId=(\\d+)" + "&H=S" + "&GradebookId=(\\d+)" + "&TermId=\\d+" + "&StudentId=\\d+&").matcher(assignmentDetailLink); matcher.find(); int assignmentId = Integer.parseInt(matcher.group(1)); int gradebookId = Integer.parseInt(matcher.group(2)); assignment.put("assignmentId", assignmentId); assignment.put("gradebookId", gradebookId); grades.put(assignment); } // System.out.println((grades)); return grades; }
From source file:app.sunstreak.yourpisd.net.Parser.java
/** * Reads assignment view page and returns teacher name. * /*from w ww .jav a2s.c o m*/ * Parses from this table: * * <table id='classStandardInfo'> <tbody> <tr> * <td> <div class='classInfoHeader'>Kapur, Sidharth (226344)</div>2013-08-29 <td> * <table> * <tr> <th style='width:1%'>Course:</th> <td><a href='javascript:ClassDetails.getClassDetails(2976981);' id='ClassTitle'>CHEM AP(00)</a></td></tr> * <tr> <th>Term:</th> <td>1st Six Weeks</td> </tr> * <tr> <th>Teacher:</th> <td><a href="mailto:Nicole.Lyssy@pisd.edu" title="Nicole.Lyssy@pisd.edu">Lyssy, Carol</a></td> </tr> * </table> * <td> </tr> </tbody></table> */ public static String[] teacher(String html) { Element doc = Jsoup.parse(html); Element classStandardInfo = doc.getElementById("classStandardInfo"); // teacher is the third row in this table Element teacher = classStandardInfo.getElementsByTag("table").get(0).getElementsByTag("tr").get(3) .getElementsByTag("td").get(0); // System.out.println(teacher); String email = ""; try { email = teacher.getElementsByTag("a").get(0).attr("title"); } catch (IndexOutOfBoundsException e) { // Senior release teacher have NO email. The <a> tag does not exist. } String teacherName = teacher.text(); return new String[] { teacherName, email }; }
From source file:app.sunstreak.yourpisd.net.Parser.java
/** Parses average of each term from GradeSummary.aspx. * NOTICE: Does not work for second semester classes in which the second semester schedule * is different from the first semester schedule. * /*from w ww.j a v a 2 s . com*/ * @param doc the Jsoup element of GradeSummary.aspx * @param classList classList as returned by Init.aspx * @throws org.json.JSONException * @return [ * [classId, avg0, avg1, ...], * [classId, avg0, avg1, ...], * ] */ public static int[][] gradeSummary(Element doc, JSONArray classList) { List<int[]> gradeSummary = new ArrayList<int[]>(); Element reportTable = doc.getElementsByClass("reportTable").get(0).getElementsByTag("tbody").get(0); Elements rows = reportTable.getElementsByTag("tr"); int rowIndex = 0; while (rowIndex < rows.size()) { int[] classAverages = new int[11]; Arrays.fill(classAverages, -3); Element row = rows.get(rowIndex); Elements columns = row.getElementsByTag("td"); classAverages[0] = getClassId(row); for (int col = 0; col < 10; col++) { Element column = columns.get(col); String text = column.text(); // -2 for disabled class if (column.attr("class").equals("disabledCell")) text = "-2"; classAverages[col + 1] = text.equals("") ? -1 : Integer.parseInt(text); } gradeSummary.add(classAverages); rowIndex++; } /* * [ * [classId, avg0, avg1, ...], * [classId, avg0, avg1, ...], * ] */ int[][] result = new int[gradeSummary.size()][]; for (int i = 0; i < result.length; i++) { result[i] = new int[gradeSummary.get(i).length]; for (int j = 0; j < result[i].length; j++) result[i][j] = gradeSummary.get(i)[j]; } return result; }
From source file:controllers.CNBCProxy.java
public static F.Promise<Result> index(String query) { if (StringUtils.isEmpty(query)) { F.Promise.promise(new F.Function0<Object>() { @Override//from w w w . j av a 2 s . co m public Object apply() throws Throwable { return ok(Json.toJson("Query parameter (q) not provided ")); } }); } String target = "all"; String categories = "exclude"; String partnerId = "2000"; // ?target=all&categories=exclude&partnerId=2000&keywords=apple F.Promise<WSResponse> wsResponsePromise = WS.url("http://search.cnbc.com/main.do") .setQueryParameter("target", target).setQueryParameter("categories", categories) .setQueryParameter("partnerId", partnerId).setQueryParameter("keywords", query).get(); return wsResponsePromise.map(new F.Function<WSResponse, Result>() { @Override public Result apply(WSResponse wsResponse) throws Throwable { String body = wsResponse.getBody(); List<Map<String, String>> results = new ArrayList<Map<String, String>>(); try { // Parse html document org.jsoup.nodes.Document doc = Jsoup.parse(body); Elements items = doc.select("div:not(.clr).padL.padR"); // Choose elements that contain classes "padL" and "padR", but not "clr" // Iterate through results for (Element item : items) { Map<String, String> keyValue = new LinkedHashMap<String, String>(); // Add the keys and values keyValue.put("title", item.select("a").text()); keyValue.put("content", item.select("span.cnbc_bio_content").text()); keyValue.put("date", CalculateDateFormat(Long .parseLong(item.getElementsByTag("script").html().replaceAll("[^0-9]", ""), 10))); // Edit the date format keyValue.put("url", item.select("a").attr("href")); results.add(keyValue); } } catch (DOMException e) { e.printStackTrace(); } return ok(Json.toJson(results)); } }); }
From source file:app.sunstreak.yourpisd.net.Parser.java
/** * /*from w w w.j a v a 2 s .c o m*/ * @param html the source code for ANY page in Gradebook (usually Default.aspx) * @return */ public static List<String[]> parseStudents(String html) { List<String[]> list = new ArrayList<String[]>(); Element doc = Jsoup.parse(html); Element studentList = doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxStudentlist"); // Only one student if (studentList.text().isEmpty()) { // {studentId, studentName} list.add(new String[] { doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxStudentId").attr("value"), doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxMultiple").text() }); return list; } // Multiple students else { for (Element a : studentList.getElementsByTag("a")) { String name = a.text(); String onClick = a.attr("onClick"); String studentId = onClick.substring(onClick.indexOf('\'') + 1, onClick.lastIndexOf('\'')); list.add(new String[] { studentId, name }); } return list; } }
From source file:com.jimplush.goose.ContentExtractor.java
/** * checks the density of links within a node, is there not much text and most of it contains linky shit? * if so it's no good/*from w ww .ja va 2 s . c o m*/ * * @param e * @return */ private static boolean isHighLinkDensity(Element e) { Elements links = e.getElementsByTag("a"); if (links.size() == 0) { return false; } String text = e.text().trim(); String[] words = SPACE_SPLITTER.split(text); float numberOfWords = words.length; // let's loop through all the links and calculate the number of words that make up the links StringBuilder sb = new StringBuilder(); for (Element link : links) { sb.append(link.text()); } String linkText = sb.toString(); String[] linkWords = SPACE_SPLITTER.split(linkText); float numberOfLinkWords = linkWords.length; float numberOfLinks = links.size(); float linkDivisor = numberOfLinkWords / numberOfWords; float score = linkDivisor * numberOfLinks; if (logger.isDebugEnabled()) { String logText; if (e.text().length() >= 51) { logText = e.text().substring(0, 50); } else { logText = e.text(); } logger.debug("Calulated link density score as: " + score + " for node: " + logText); } if (score > 1) { return true; } return false; }