Example usage for org.jsoup.nodes Element getElementsByTag

List of usage examples for org.jsoup.nodes Element getElementsByTag

Introduction

In this page you can find the example usage for org.jsoup.nodes Element getElementsByTag.

Prototype

public Elements getElementsByTag(String tagName) 

Source Link

Document

Finds elements, including and recursively under this element, with the specified tag name.

Usage

From source file:app.sunstreak.yourpisd.net.Parser.java

/**
 * returns value of pageUniqueId from html
 * String must contain the following block:
 * <input type="hidden" name="PageUniqueId" id="PageUniqueId" value="8bdc977f-ccaf-4a18-b4dd-20d1406fad6a" />
 *///  ww  w  .  j  a  v a2s  .co  m
public static String pageUniqueId(Element doc) {
    Elements inputElements = doc.getElementsByTag("input");

    for (Element e : inputElements)
        if (e.attr("name").equals("PageUniqueId"))
            return e.attr("value");

    return null;
}

From source file:app.sunstreak.yourpisd.net.Parser.java

public static boolean accessGrantedEditure(String html) {
    Element doc = Jsoup.parse(html);
    String title = doc.getElementsByTag("title").text();
    return !title.equals(LOGIN_FAILURE_TITLE) ? true : false;
}

From source file:app.sunstreak.yourpisd.net.Parser.java

/**
 * /*w w w.  java  2s .  co  m*/
 * @param html html source code of https://sso.portal.mypisd.net/cas/login?service=http%3A%2F%2Fportal.mypisd.net%2Fc%2Fportal%2Flogin
 * @return the value embedded in the <input type="hidden" name="lt" value=""> block
 */
public static String portalLt(String html) {
    Element doc = Jsoup.parse(html);
    Elements inputTags = doc.getElementsByTag("input");
    //Shortcut
    //      if (inputTags.get(4).attr("name").equals("lt"))
    //         return inputTags.get(4).attr("value");
    //      else {
    for (Element tag : inputTags) {
        if (tag.attr("name").equals("lt"))
            return tag.attr("value");
    }
    //      }
    return null;
}

From source file:app.sunstreak.yourpisd.net.Parser.java

public static Object[] termCategoryGrades(String html) throws JSONException {
    JSONArray termCategoryGrades = new JSONArray();

    Element doc = Jsoup.parse(html);
    Element categoryTable = doc.getElementById("Category");
    Elements rows = categoryTable.getElementsByTag("tbody").get(0).getElementsByTag("tr");

    for (Element row : rows) {
        JSONObject category = new JSONObject();
        Elements columns = row.getElementsByTag("td");
        for (int i = 0; i < columns.size(); i++) {

            String value = columns.get(i).text();
            // do not store empty values!
            if (value.equals(""))
                continue;
            // first try to cast as int.
            try {
                category.putOpt(categoryTableHeader(i), Integer.parseInt(value));
                // if not int, try double
            } catch (NumberFormatException e) {
                try {
                    category.putOpt(categoryTableHeader(i), Double.parseDouble(value));
                    // if not double, use string
                } catch (NumberFormatException f) {
                    category.putOpt(categoryTableHeader(i), value);
                }//from  w  w w  .  j  av  a 2  s .  co m
            }
        }
        termCategoryGrades.put(category);

    }

    // The average for the six weeks is 
    int average = -1;
    try {
        Element finalGrade = doc.getElementById("finalGrade");
        average = Integer.parseInt(finalGrade.getElementsByTag("td").get(3).text());
    } catch (NullPointerException e) {
        // Let average be -1
    }

    return new Object[] { termCategoryGrades, average };
}

From source file:app.sunstreak.yourpisd.net.Parser.java

public static JSONArray detailedReport(String html) throws JSONException {
    Element doc = Jsoup.parse(html);
    //      System.out.println(html);
    Element assignments = doc.getElementsByAttributeValue("id", "Assignments").get(0);
    Elements tableRows = assignments.getElementsByTag("tbody").get(0).getElementsByTag("tr");

    JSONArray grades = new JSONArray();

    for (Element tr : tableRows) {
        JSONObject assignment = new JSONObject();

        Elements columns = tr.getElementsByTag("td");

        for (int i = 0; i < columns.size(); i++) {
            String value = columns.get(i).text();
            // do not store empty values!
            if (value.equals(""))
                continue;
            // first try to cast as int.
            try {
                assignment.putOpt(assignmentTableHeader(i), Integer.parseInt(value));
                // if not int, try double
            } catch (NumberFormatException e) {
                try {
                    assignment.putOpt(assignmentTableHeader(i), Double.parseDouble(value));
                    // if not double, use string
                } catch (NumberFormatException f) {
                    assignment.putOpt(assignmentTableHeader(i), value);
                }/* w w  w .j ava 2  s  .co m*/
            }
        }

        String assignmentDetailLink = tr.getElementsByTag("a").get(0).attr("href");
        Matcher matcher = Pattern.compile(".+" + "assignmentId=(\\d+)" + "&H=S" + "&GradebookId=(\\d+)"
                + "&TermId=\\d+" + "&StudentId=\\d+&").matcher(assignmentDetailLink);
        matcher.find();
        int assignmentId = Integer.parseInt(matcher.group(1));
        int gradebookId = Integer.parseInt(matcher.group(2));
        assignment.put("assignmentId", assignmentId);
        assignment.put("gradebookId", gradebookId);
        grades.put(assignment);
    }
    //      System.out.println((grades));
    return grades;
}

From source file:app.sunstreak.yourpisd.net.Parser.java

/**
 * Reads assignment view page and returns teacher name.
 * /*from w ww  .jav  a2s.c  o m*/
 * Parses from this table:
 * 
 * <table id='classStandardInfo'> <tbody> <tr>  
 * <td>       <div class='classInfoHeader'>Kapur, Sidharth (226344)</div>2013-08-29   <td>    
 * <table>    
 *       <tr>      <th style='width:1%'>Course:</th>      <td><a href='javascript:ClassDetails.getClassDetails(2976981);' id='ClassTitle'>CHEM  AP(00)</a></td></tr>    
 *       <tr>      <th>Term:</th>      <td>1st Six Weeks</td>     </tr>
 *       <tr>      <th>Teacher:</th>      <td><a href="mailto:Nicole.Lyssy@pisd.edu" title="Nicole.Lyssy@pisd.edu">Lyssy, Carol</a></td>     </tr>
 * </table>
 * <td>  </tr> </tbody></table>
 */
public static String[] teacher(String html) {
    Element doc = Jsoup.parse(html);
    Element classStandardInfo = doc.getElementById("classStandardInfo");
    // teacher is the third row in this table
    Element teacher = classStandardInfo.getElementsByTag("table").get(0).getElementsByTag("tr").get(3)
            .getElementsByTag("td").get(0);
    //      System.out.println(teacher);
    String email = "";
    try {
        email = teacher.getElementsByTag("a").get(0).attr("title");
    } catch (IndexOutOfBoundsException e) {
        // Senior release teacher have NO email. The <a> tag does not exist.
    }
    String teacherName = teacher.text();
    return new String[] { teacherName, email };
}

From source file:app.sunstreak.yourpisd.net.Parser.java

/** Parses average of each term from GradeSummary.aspx.
 * NOTICE: Does not work for second semester classes in which the second semester schedule
 *  is different from the first semester schedule.
 * /*from w  ww.j  a v  a  2 s  .  com*/
 * @param doc the Jsoup element of GradeSummary.aspx
 * @param classList classList as returned by Init.aspx
 * @throws org.json.JSONException
 * @return     [
 *       [classId, avg0, avg1, ...],
 *       [classId, avg0, avg1, ...],
 * ]
 */
public static int[][] gradeSummary(Element doc, JSONArray classList) {

    List<int[]> gradeSummary = new ArrayList<int[]>();

    Element reportTable = doc.getElementsByClass("reportTable").get(0).getElementsByTag("tbody").get(0);
    Elements rows = reportTable.getElementsByTag("tr");
    int rowIndex = 0;

    while (rowIndex < rows.size()) {

        int[] classAverages = new int[11];
        Arrays.fill(classAverages, -3);

        Element row = rows.get(rowIndex);
        Elements columns = row.getElementsByTag("td");

        classAverages[0] = getClassId(row);

        for (int col = 0; col < 10; col++) {
            Element column = columns.get(col);
            String text = column.text();

            // -2 for disabled class
            if (column.attr("class").equals("disabledCell"))
                text = "-2";
            classAverages[col + 1] = text.equals("") ? -1 : Integer.parseInt(text);
        }
        gradeSummary.add(classAverages);
        rowIndex++;
    }

    /*
     * [
     *       [classId, avg0, avg1, ...],
     *       [classId, avg0, avg1, ...],
     * ]
     */
    int[][] result = new int[gradeSummary.size()][];
    for (int i = 0; i < result.length; i++) {
        result[i] = new int[gradeSummary.get(i).length];
        for (int j = 0; j < result[i].length; j++)
            result[i][j] = gradeSummary.get(i)[j];
    }
    return result;
}

From source file:controllers.CNBCProxy.java

public static F.Promise<Result> index(String query) {

    if (StringUtils.isEmpty(query)) {

        F.Promise.promise(new F.Function0<Object>() {
            @Override//from  w w w .  j av  a  2  s .  co  m
            public Object apply() throws Throwable {
                return ok(Json.toJson("Query parameter (q) not provided "));
            }

        });
    }

    String target = "all";
    String categories = "exclude";
    String partnerId = "2000";
    // ?target=all&categories=exclude&partnerId=2000&keywords=apple
    F.Promise<WSResponse> wsResponsePromise = WS.url("http://search.cnbc.com/main.do")
            .setQueryParameter("target", target).setQueryParameter("categories", categories)
            .setQueryParameter("partnerId", partnerId).setQueryParameter("keywords", query).get();

    return wsResponsePromise.map(new F.Function<WSResponse, Result>() {
        @Override
        public Result apply(WSResponse wsResponse) throws Throwable {

            String body = wsResponse.getBody();

            List<Map<String, String>> results = new ArrayList<Map<String, String>>();

            try {
                // Parse html document
                org.jsoup.nodes.Document doc = Jsoup.parse(body);
                Elements items = doc.select("div:not(.clr).padL.padR"); // Choose elements that contain classes "padL" and "padR", but not "clr"

                // Iterate through results
                for (Element item : items) {

                    Map<String, String> keyValue = new LinkedHashMap<String, String>();

                    // Add the keys and values
                    keyValue.put("title", item.select("a").text());
                    keyValue.put("content", item.select("span.cnbc_bio_content").text());
                    keyValue.put("date", CalculateDateFormat(Long
                            .parseLong(item.getElementsByTag("script").html().replaceAll("[^0-9]", ""), 10))); // Edit the date format
                    keyValue.put("url", item.select("a").attr("href"));

                    results.add(keyValue);
                }
            } catch (DOMException e) {
                e.printStackTrace();
            }

            return ok(Json.toJson(results));
        }
    });
}

From source file:app.sunstreak.yourpisd.net.Parser.java

/**
 * /*from   w w w.j a v a  2  s .c o m*/
 * @param html the source code for ANY page in Gradebook (usually Default.aspx)
 * @return
 */
public static List<String[]> parseStudents(String html) {
    List<String[]> list = new ArrayList<String[]>();

    Element doc = Jsoup.parse(html);
    Element studentList = doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxStudentlist");

    // Only one student
    if (studentList.text().isEmpty()) {
        // {studentId, studentName}
        list.add(new String[] { doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxStudentId").attr("value"),
                doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxMultiple").text() });
        return list;
    }
    // Multiple students
    else {
        for (Element a : studentList.getElementsByTag("a")) {
            String name = a.text();
            String onClick = a.attr("onClick");
            String studentId = onClick.substring(onClick.indexOf('\'') + 1, onClick.lastIndexOf('\''));
            list.add(new String[] { studentId, name });
        }
        return list;
    }
}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * checks the density of links within a node, is there not much text and most of it contains linky shit?
 * if so it's no good/*from w  ww  .ja  va  2  s .  c  o  m*/
 *
 * @param e
 * @return
 */
private static boolean isHighLinkDensity(Element e) {

    Elements links = e.getElementsByTag("a");

    if (links.size() == 0) {
        return false;
    }

    String text = e.text().trim();
    String[] words = SPACE_SPLITTER.split(text);
    float numberOfWords = words.length;

    // let's loop through all the links and calculate the number of words that make up the links
    StringBuilder sb = new StringBuilder();
    for (Element link : links) {
        sb.append(link.text());
    }
    String linkText = sb.toString();
    String[] linkWords = SPACE_SPLITTER.split(linkText);
    float numberOfLinkWords = linkWords.length;

    float numberOfLinks = links.size();

    float linkDivisor = numberOfLinkWords / numberOfWords;
    float score = linkDivisor * numberOfLinks;

    if (logger.isDebugEnabled()) {
        String logText;
        if (e.text().length() >= 51) {
            logText = e.text().substring(0, 50);
        } else {
            logText = e.text();
        }
        logger.debug("Calulated link density score as: " + score + " for node: " + logText);
    }
    if (score > 1) {
        return true;
    }

    return false;
}