List of usage examples for org.jsoup.nodes Element text
public String text()
From source file:org.shareok.data.plosdata.PlosUtil.java
public static String getPlosAck(String html) { String ack = ""; Document doc = Jsoup.parse(html.toString()); Elements ackLinks = doc.select("a[id=ack]"); if (!ackLinks.isEmpty()) { Element ackDiv = ackLinks.first().parent(); if (null != ackDiv) { Elements ackParagraphs = ackDiv.select("p"); if (!ackParagraphs.isEmpty()) { for (Element element : ackParagraphs) { if (element.hasText()) ack += element.text(); }//from ww w. j a v a2s . c om } //System.out.println("the ack = "+ack+"\n\n"); } } return ack; }
From source file:org.shareok.data.plosdata.PlosUtil.java
public static String getPlosCitation(String html) { String citation = ""; Document doc = Jsoup.parse(html.toString()); Elements articleInfoDiv = doc.select("div[class=articleinfo]"); if (!articleInfoDiv.isEmpty()) { Element citationParagraph = articleInfoDiv.first().child(0); if (null != citationParagraph) { citation = citationParagraph.text().replace("Citation:", ""); //System.out.println("the citation = "+citation+"\n\n"); }//from w ww.j a v a 2 s.c om } return citation; }
From source file:org.shareok.data.plosdata.PlosUtil.java
/** * //from w ww. j a v a 2 s .c o m * @param html : The string of the web page source * @return author contribution statement */ public static String getAuthorContributions(String html) { String contributions = ""; Document doc = Jsoup.parse(html.toString()); Elements articleInfoDiv = doc.select("div[class=contributions]"); if (!articleInfoDiv.isEmpty()) { Element contributionsParagraph = articleInfoDiv.first().child(2); if (null != contributionsParagraph) { contributions = contributionsParagraph.text(); //System.out.println("the contributions = "+contributions+"\n\n");System.exit(0); } } return contributions; }
From source file:com.kantenkugel.discordbot.jdocparser.JDoc.java
private static void fetchJavaClassIndexes() { try {/*from w w w . j a v a 2 s. co m*/ Response res = Bot.httpClient .newCall(new Request.Builder().url(JDocUtil.JAVA_JDOCS_CLASS_INDEX).get().build()).execute(); if (!res.isSuccessful()) { JDocUtil.LOG.warn("OkHttp returned failure for java8 index: " + res.code()); return; } ResponseBody body = res.body(); Document docBody = Jsoup.parse(body.byteStream(), "UTF-8", JDocUtil.JAVA_JDOCS_PREFIX); docBody.getElementsByClass("indexContainer").first().child(0).children().forEach(child -> { Element link = child.child(0); if (link.tagName().equals("a") && link.attr("href").startsWith("java/")) { javaJavaDocs.put(link.text().toLowerCase(), link.attr("href")); } }); } catch (Exception e) { JDocUtil.LOG.error("Failed fetching the j8 class index", e); } }
From source file:me.vertretungsplan.parser.UntisInfoParser.java
private static void parseTimetableCellContent(Substitution s, String type, Element td) { String value = td.text(); if (value.startsWith("*")) { value = value.substring(1);// www .j a v a 2 s. c o m } if (value.equals("---.") || value.equals("---")) { value = null; } boolean striked = td.select("strike").text().equals(td.text()); switch (type) { case "subject": if (striked) { s.setPreviousSubject(value); } else { s.setSubject(value); } break; case "teacher": if (striked) { s.setPreviousTeacher(value); } else { s.setTeacher(value); } break; case "room": if (striked) { s.setPreviousRoom(value); } else { s.setRoom(value); } break; } }
From source file:app.sunstreak.yourpisd.net.Parser.java
/** * @param doc The Jsoup element from any Gradebook page. * @return the full name (Last, First) of the student and ID number. *///from ww w. j a v a 2s . co m public static String studentName(Element doc) { Element studentName = doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxMultiple"); return studentName.text(); }
From source file:app.sunstreak.yourpisd.net.Parser.java
/** Parses average of each term from GradeSummary.aspx. * NOTICE: Does not work for second semester classes in which the second semester schedule * is different from the first semester schedule. * /* ww w . j a va 2s . c om*/ * @param doc the Jsoup element of GradeSummary.aspx * @param classList classList as returned by Init.aspx * @throws org.json.JSONException * @return [ * [classId, avg0, avg1, ...], * [classId, avg0, avg1, ...], * ] */ public static int[][] gradeSummary(Element doc, JSONArray classList) { List<int[]> gradeSummary = new ArrayList<int[]>(); Element reportTable = doc.getElementsByClass("reportTable").get(0).getElementsByTag("tbody").get(0); Elements rows = reportTable.getElementsByTag("tr"); int rowIndex = 0; while (rowIndex < rows.size()) { int[] classAverages = new int[11]; Arrays.fill(classAverages, -3); Element row = rows.get(rowIndex); Elements columns = row.getElementsByTag("td"); classAverages[0] = getClassId(row); for (int col = 0; col < 10; col++) { Element column = columns.get(col); String text = column.text(); // -2 for disabled class if (column.attr("class").equals("disabledCell")) text = "-2"; classAverages[col + 1] = text.equals("") ? -1 : Integer.parseInt(text); } gradeSummary.add(classAverages); rowIndex++; } /* * [ * [classId, avg0, avg1, ...], * [classId, avg0, avg1, ...], * ] */ int[][] result = new int[gradeSummary.size()][]; for (int i = 0; i < result.length; i++) { result[i] = new int[gradeSummary.get(i).length]; for (int j = 0; j < result[i].length; j++) result[i][j] = gradeSummary.get(i)[j]; } return result; }
From source file:com.ferasinfotech.gwreader.ScreenSlidePageFragment.java
/** * Alternate Factory method for this fragment class. Constructs a new fragment for the given page number, * and HTML story element.// w w w . j a v a2 s .c om */ public static ScreenSlidePageFragment create(int pageNumber, int numPages, org.jsoup.nodes.Element story) { int story_id = -1; String name = ""; String summary = ""; String headline = ""; String cover_photo_url = ""; String story_string = ""; long createdAt; ScreenSlidePageFragment fragment = new ScreenSlidePageFragment(); Bundle args = new Bundle(); if (pageNumber == 0) { story_id = 0; name = "Grasswire Help"; headline = "Usage Instructions"; cover_photo_url = "android.resource://com.ferasinfotech.gwreader/" + R.drawable.gw_logo; summary = "Swipe right and left to read each story.\n\n" + "Scroll down to read facts and associated news items (tweets and links) for each story.\n\n" + "Tap on a news items within a story and you'll be able to follow web links, view tweets via the Twitter app, or watch videos.\n\n" + "A long press on a story's cover photo will launch the device browser to view or edit the story on the Grasswire mobile site.\n\n" + "A long press on the image above will launch the Grasswire main page.\n\n" + "App Version: " + BuildConfig.VERSION_NAME + "\n\n"; } else { // doing a story page, Element 'story' is the story data Elements e_list; org.jsoup.nodes.Element tag; story_id = Integer.valueOf(story.attr("data-story-id")); e_list = story.getElementsByClass("feature__tag"); tag = e_list.get(0); name = tag.text() + " (" + pageNumber + "/" + numPages + ")"; e_list = story.getElementsByClass("story__summary"); tag = e_list.get(0); summary = tag.html().replace("<br />", "\r"); e_list = story.getElementsByClass("feature__text"); tag = e_list.get(0); headline = tag.text(); e_list = story.getElementsByClass("feature__image"); tag = e_list.get(0); cover_photo_url = tag.attr("src"); story_string = story.toString(); } args.putInt(ARG_PAGE, pageNumber); args.putInt(ARG_STORY_ID, story_id); args.putString(ARG_TITLE, name); args.putString(ARG_SUMMARY, summary); args.putString(ARG_HEADLINE, headline); args.putString(ARG_COVER_PHOTO, cover_photo_url); args.putString(ARG_STORY_STRING, "<html><head></head><body>" + story_string + "</body></html>"); fragment.setArguments(args); return fragment; }
From source file:com.nuance.expertassistant.ContentExtractor.java
public static void extract(Document doc) { final Elements links = doc.getElementsByTag("a"); final Elements ps = doc.select("p"); final String title = doc.title(); print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(doc.title()) + "\">"); final Elements elements = doc.select("*"); final ArrayList<String> openHeaderList = new ArrayList<String>(); for (final Element element : elements) { if (element.ownText() == null || element.ownText().isEmpty() || element.ownText().trim() == "") { } else if (element.tagName().toString().contains("a")) { } else if (element.tagName().contains("h1") && element.text() != null && !element.text().isEmpty()) { if (openHeaderList.contains("h1")) { openHeaderList.remove("h1"); print("</section>"); }/*from w w w. j a va2s .c om*/ if (openHeaderList.contains("h2")) { openHeaderList.remove("h2"); print("</section>"); } if (openHeaderList.contains("h3")) { openHeaderList.remove("h3"); print("</section>"); } if (openHeaderList.contains("h4")) { openHeaderList.remove("h4"); print("</section>"); } print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">"); openHeaderList.add("h1"); } else if (element.tagName().contains("h2") && element.text() != null && !element.text().isEmpty()) { if (openHeaderList.contains("h2")) { openHeaderList.remove("h2"); print("</section>"); } if (openHeaderList.contains("h3")) { openHeaderList.remove("h3"); print("</section>"); } if (openHeaderList.contains("h4")) { openHeaderList.remove("h4"); print("</section>"); } print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">"); openHeaderList.add("h2"); } else if (element.tagName().contains("h3") && element.text() != null && !element.text().isEmpty()) { if (openHeaderList.contains("h3")) { openHeaderList.remove("h3"); print("</section>"); } if (openHeaderList.contains("h4")) { openHeaderList.remove("h4"); print("</section>"); } print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">"); openHeaderList.add("h3"); } else if (element.tagName().contains("h4") && element.text() != null && !element.text().isEmpty()) { if (openHeaderList.contains("h4")) { openHeaderList.remove("h4"); print("</section>"); } print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">"); openHeaderList.add("h4"); } else { print("<para>"); print(stripNonValidXMLCharacters(element.ownText())); print("</para>"); } /* * if (element.tagName().contains("img")) { print("<img src=\"" + * element.attr("src") + "\"></img>"); } */ } if (openHeaderList.contains("h1")) { openHeaderList.remove("h1"); print("</section>"); } if (openHeaderList.contains("h2")) { openHeaderList.remove("h2"); print("</section>"); } if (openHeaderList.contains("h3")) { openHeaderList.remove("h3"); print("</section>"); } if (openHeaderList.contains("h4")) { openHeaderList.remove("h4"); print("</section>"); } print("</section>"); }
From source file:app.sunstreak.yourpisd.net.Parser.java
/** * /*from w w w .j av a2 s.co m*/ * @param html the source code for ANY page in Gradebook (usually Default.aspx) * @return */ public static List<String[]> parseStudents(String html) { List<String[]> list = new ArrayList<String[]>(); Element doc = Jsoup.parse(html); Element studentList = doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxStudentlist"); // Only one student if (studentList.text().isEmpty()) { // {studentId, studentName} list.add(new String[] { doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxStudentId").attr("value"), doc.getElementById("ctl00_ctl00_ContentPlaceHolder_uxMultiple").text() }); return list; } // Multiple students else { for (Element a : studentList.getElementsByTag("a")) { String name = a.text(); String onClick = a.attr("onClick"); String studentId = onClick.substring(onClick.indexOf('\'') + 1, onClick.lastIndexOf('\'')); list.add(new String[] { studentId, name }); } return list; } }