Example usage for org.jsoup.nodes Element text

Introduction

In this page you can find the example usage for org.jsoup.nodes Element text.

Prototype

public String text()

Source Link

Document

Gets the combined text of this element and all its children.

Usage

From source file:org.shareok.data.plosdata.PlosUtil.java

public static String getPlosAck(String html) {

    String ack = "";
    Document doc = Jsoup.parse(html.toString());
    Elements ackLinks = doc.select("a[id=ack]");
    if (!ackLinks.isEmpty()) {
        Element ackDiv = ackLinks.first().parent();
        if (null != ackDiv) {
            Elements ackParagraphs = ackDiv.select("p");
            if (!ackParagraphs.isEmpty()) {
                for (Element element : ackParagraphs) {
                    if (element.hasText())
                        ack += element.text();
                }//from ww w. j a v a2s  . c om
            }
            //System.out.println("the ack = "+ack+"\n\n");
        }
    }

    return ack;
}

From source file:org.shareok.data.plosdata.PlosUtil.java

public static String getPlosCitation(String html) {

    String citation = "";

    Document doc = Jsoup.parse(html.toString());
    Elements articleInfoDiv = doc.select("div[class=articleinfo]");
    if (!articleInfoDiv.isEmpty()) {
        Element citationParagraph = articleInfoDiv.first().child(0);
        if (null != citationParagraph) {
            citation = citationParagraph.text().replace("Citation:", "");
            //System.out.println("the citation = "+citation+"\n\n");
        }//from   w ww.j  a  v a  2 s.c  om
    }

    return citation;
}

From source file:org.shareok.data.plosdata.PlosUtil.java

/**
 * //from w ww. j a  v  a  2 s .c  o m
 * @param html : The string of the web page source
 * @return author contribution statement
 */
public static String getAuthorContributions(String html) {
    String contributions = "";

    Document doc = Jsoup.parse(html.toString());
    Elements articleInfoDiv = doc.select("div[class=contributions]");
    if (!articleInfoDiv.isEmpty()) {
        Element contributionsParagraph = articleInfoDiv.first().child(2);
        if (null != contributionsParagraph) {
            contributions = contributionsParagraph.text();
            //System.out.println("the contributions = "+contributions+"\n\n");System.exit(0);
        }
    }

    return contributions;
}

From source file:com.kantenkugel.discordbot.jdocparser.JDoc.java

private static void fetchJavaClassIndexes() {
    try {/*from   w w  w .  j a v  a  2 s. co  m*/
        Response res = Bot.httpClient
                .newCall(new Request.Builder().url(JDocUtil.JAVA_JDOCS_CLASS_INDEX).get().build()).execute();
        if (!res.isSuccessful()) {
            JDocUtil.LOG.warn("OkHttp returned failure for java8 index: " + res.code());
            return;
        }
        ResponseBody body = res.body();
        Document docBody = Jsoup.parse(body.byteStream(), "UTF-8", JDocUtil.JAVA_JDOCS_PREFIX);
        docBody.getElementsByClass("indexContainer").first().child(0).children().forEach(child -> {
            Element link = child.child(0);
            if (link.tagName().equals("a") && link.attr("href").startsWith("java/")) {
                javaJavaDocs.put(link.text().toLowerCase(), link.attr("href"));
            }
        });
    } catch (Exception e) {
        JDocUtil.LOG.error("Failed fetching the j8 class index", e);
    }
}

From source file:me.vertretungsplan.parser.UntisInfoParser.java

private static void parseTimetableCellContent(Substitution s, String type, Element td) {
    String value = td.text();
    if (value.startsWith("*")) {
        value = value.substring(1);//  www .j  a v  a  2 s.  c  o m
    }
    if (value.equals("---.") || value.equals("---")) {
        value = null;
    }

    boolean striked = td.select("strike").text().equals(td.text());

    switch (type) {
    case "subject":
        if (striked) {
            s.setPreviousSubject(value);
        } else {
            s.setSubject(value);
        }
        break;
    case "teacher":
        if (striked) {
            s.setPreviousTeacher(value);
        } else {
            s.setTeacher(value);
        }
        break;
    case "room":
        if (striked) {
            s.setPreviousRoom(value);
        } else {
            s.setRoom(value);
        }
        break;
    }
}

From source file:app.sunstreak.yourpisd.net.Parser.java

/**
 * @param doc The Jsoup element from any Gradebook page.
 * @return the full name (Last, First) of the student and ID number.
 *///from ww  w.  j a  v  a 2s . co  m
public static String studentName(Element doc) {
    Element studentName = doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxMultiple");
    return studentName.text();
}

From source file:app.sunstreak.yourpisd.net.Parser.java

/** Parses average of each term from GradeSummary.aspx.
 * NOTICE: Does not work for second semester classes in which the second semester schedule
 *  is different from the first semester schedule.
 * /*  ww  w .  j a va  2s  . c  om*/
 * @param doc the Jsoup element of GradeSummary.aspx
 * @param classList classList as returned by Init.aspx
 * @throws org.json.JSONException
 * @return     [
 *       [classId, avg0, avg1, ...],
 *       [classId, avg0, avg1, ...],
 * ]
 */
public static int[][] gradeSummary(Element doc, JSONArray classList) {

    List<int[]> gradeSummary = new ArrayList<int[]>();

    Element reportTable = doc.getElementsByClass("reportTable").get(0).getElementsByTag("tbody").get(0);
    Elements rows = reportTable.getElementsByTag("tr");
    int rowIndex = 0;

    while (rowIndex < rows.size()) {

        int[] classAverages = new int[11];
        Arrays.fill(classAverages, -3);

        Element row = rows.get(rowIndex);
        Elements columns = row.getElementsByTag("td");

        classAverages[0] = getClassId(row);

        for (int col = 0; col < 10; col++) {
            Element column = columns.get(col);
            String text = column.text();

            // -2 for disabled class
            if (column.attr("class").equals("disabledCell"))
                text = "-2";
            classAverages[col + 1] = text.equals("") ? -1 : Integer.parseInt(text);
        }
        gradeSummary.add(classAverages);
        rowIndex++;
    }

    /*
     * [
     *       [classId, avg0, avg1, ...],
     *       [classId, avg0, avg1, ...],
     * ]
     */
    int[][] result = new int[gradeSummary.size()][];
    for (int i = 0; i < result.length; i++) {
        result[i] = new int[gradeSummary.get(i).length];
        for (int j = 0; j < result[i].length; j++)
            result[i][j] = gradeSummary.get(i)[j];
    }
    return result;
}

From source file:com.ferasinfotech.gwreader.ScreenSlidePageFragment.java

/**
 * Alternate Factory method for this fragment class. Constructs a new fragment for the given page number,
 * and HTML story element.//  w  w  w .  j a v a2  s .c om
 */
public static ScreenSlidePageFragment create(int pageNumber, int numPages, org.jsoup.nodes.Element story) {
    int story_id = -1;
    String name = "";
    String summary = "";
    String headline = "";
    String cover_photo_url = "";
    String story_string = "";
    long createdAt;

    ScreenSlidePageFragment fragment = new ScreenSlidePageFragment();
    Bundle args = new Bundle();
    if (pageNumber == 0) {
        story_id = 0;
        name = "Grasswire Help";
        headline = "Usage Instructions";
        cover_photo_url = "android.resource://com.ferasinfotech.gwreader/" + R.drawable.gw_logo;
        summary = "Swipe right and left to read each story.\n\n"
                + "Scroll down to read facts and associated news items (tweets and links) for each story.\n\n"
                + "Tap on a news items within a story and you'll be able to follow web links, view tweets via the Twitter app, or watch videos.\n\n"
                + "A long press on a story's cover photo will launch the device browser to view or edit the story on the Grasswire mobile site.\n\n"
                + "A long press on the image above will launch the Grasswire main page.\n\n" + "App Version: "
                + BuildConfig.VERSION_NAME + "\n\n";
    } else {

        // doing a story page, Element 'story' is the story data

        Elements e_list;
        org.jsoup.nodes.Element tag;

        story_id = Integer.valueOf(story.attr("data-story-id"));
        e_list = story.getElementsByClass("feature__tag");
        tag = e_list.get(0);
        name = tag.text() + " (" + pageNumber + "/" + numPages + ")";
        e_list = story.getElementsByClass("story__summary");
        tag = e_list.get(0);
        summary = tag.html().replace("<br />", "\r");
        e_list = story.getElementsByClass("feature__text");
        tag = e_list.get(0);
        headline = tag.text();
        e_list = story.getElementsByClass("feature__image");
        tag = e_list.get(0);
        cover_photo_url = tag.attr("src");
        story_string = story.toString();

    }

    args.putInt(ARG_PAGE, pageNumber);
    args.putInt(ARG_STORY_ID, story_id);
    args.putString(ARG_TITLE, name);
    args.putString(ARG_SUMMARY, summary);
    args.putString(ARG_HEADLINE, headline);
    args.putString(ARG_COVER_PHOTO, cover_photo_url);
    args.putString(ARG_STORY_STRING, "<html><head></head><body>" + story_string + "</body></html>");
    fragment.setArguments(args);
    return fragment;
}

From source file:com.nuance.expertassistant.ContentExtractor.java

public static void extract(Document doc) {

    final Elements links = doc.getElementsByTag("a");
    final Elements ps = doc.select("p");

    final String title = doc.title();

    print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(doc.title()) + "\">");

    final Elements elements = doc.select("*");

    final ArrayList<String> openHeaderList = new ArrayList<String>();

    for (final Element element : elements) {
        if (element.ownText() == null || element.ownText().isEmpty() || element.ownText().trim() == "") {

        } else if (element.tagName().toString().contains("a")) {

        } else if (element.tagName().contains("h1") && element.text() != null && !element.text().isEmpty()) {

            if (openHeaderList.contains("h1")) {
                openHeaderList.remove("h1");
                print("</section>");
            }/*from w  w  w. j  a  va2s .c  om*/
            if (openHeaderList.contains("h2")) {
                openHeaderList.remove("h2");
                print("</section>");
            }
            if (openHeaderList.contains("h3")) {
                openHeaderList.remove("h3");
                print("</section>");
            }
            if (openHeaderList.contains("h4")) {
                openHeaderList.remove("h4");
                print("</section>");
            }

            print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">");
            openHeaderList.add("h1");

        } else if (element.tagName().contains("h2") && element.text() != null && !element.text().isEmpty()) {

            if (openHeaderList.contains("h2")) {
                openHeaderList.remove("h2");
                print("</section>");
            }
            if (openHeaderList.contains("h3")) {
                openHeaderList.remove("h3");
                print("</section>");
            }
            if (openHeaderList.contains("h4")) {
                openHeaderList.remove("h4");
                print("</section>");
            }

            print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">");
            openHeaderList.add("h2");

        } else if (element.tagName().contains("h3") && element.text() != null && !element.text().isEmpty()) {

            if (openHeaderList.contains("h3")) {
                openHeaderList.remove("h3");
                print("</section>");
            }
            if (openHeaderList.contains("h4")) {
                openHeaderList.remove("h4");
                print("</section>");
            }

            print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">");
            openHeaderList.add("h3");

        } else if (element.tagName().contains("h4") && element.text() != null && !element.text().isEmpty()) {

            if (openHeaderList.contains("h4")) {
                openHeaderList.remove("h4");
                print("</section>");
            }

            print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">");
            openHeaderList.add("h4");

        }

        else {
            print("<para>");
            print(stripNonValidXMLCharacters(element.ownText()));
            print("</para>");
        }

        /*
         * if (element.tagName().contains("img")) { print("<img src=\"" +
         * element.attr("src") + "\"></img>"); }
         */
    }

    if (openHeaderList.contains("h1")) {
        openHeaderList.remove("h1");
        print("</section>");
    }
    if (openHeaderList.contains("h2")) {
        openHeaderList.remove("h2");
        print("</section>");
    }
    if (openHeaderList.contains("h3")) {
        openHeaderList.remove("h3");
        print("</section>");
    }
    if (openHeaderList.contains("h4")) {
        openHeaderList.remove("h4");
        print("</section>");
    }

    print("</section>");

}

From source file:app.sunstreak.yourpisd.net.Parser.java

/**
 * /*from w  w w  .j av a2 s.co m*/
 * @param html the source code for ANY page in Gradebook (usually Default.aspx)
 * @return
 */
public static List<String[]> parseStudents(String html) {
    List<String[]> list = new ArrayList<String[]>();

    Element doc = Jsoup.parse(html);
    Element studentList = doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxStudentlist");

    // Only one student
    if (studentList.text().isEmpty()) {
        // {studentId, studentName}
        list.add(new String[] { doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxStudentId").attr("value"),
                doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxMultiple").text() });
        return list;
    }
    // Multiple students
    else {
        for (Element a : studentList.getElementsByTag("a")) {
            String name = a.text();
            String onClick = a.attr("onClick");
            String studentId = onClick.substring(onClick.indexOf('\'') + 1, onClick.lastIndexOf('\''));
            list.add(new String[] { studentId, name });
        }
        return list;
    }
}