List of usage examples for org.jsoup.nodes Element text
public String text()
From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java
private static void reformatXHtmlHeadings(final Document document, final String selector) { final Elements elements = document.select(selector); for (final Element element : elements) { final String text = element.text(); final String strongHeaderText = String.format("<strong>%s</strong>", text); element.html(strongHeaderText);/* w ww. j av a 2 s.com*/ } }
From source file:com.screenslicer.common.CommonUtil.java
public static String getNextSiblingTextByOwnText(Document doc, String text) { Elements elements = doc.getElementsMatchingOwnText(text); if (elements == null) { return null; }//ww w .j a v a 2 s . c o m if (elements.isEmpty()) { return null; } Element sibling = elements.get(0).nextElementSibling(); if (sibling != null) { return sibling.text(); } return null; }
From source file:com.megatome.j2d.support.JavadocSupport.java
private static List<SearchIndexValue> indexFile(File f) throws BuilderException { final List<SearchIndexValue> values = new ArrayList<>(); final Elements elements = loadAndFindLinks(f); for (final Element e : elements) { Element parent = e.parent(); if (!parent.child(0).equals(e)) { continue; }//from w ww. j ava 2 s .com final String parentTagName = parent.tagName(); if (parentPattern.matcher(parentTagName).matches()) { parent = parent.parent(); if (!parent.child(0).equals(e.parent())) { continue; } } if (!containsIgnoreCase(parentTagName, "dt")) { continue; } final String text = parent.text(); final String name = e.text(); final String className = parent.className(); final MatchType type = getMatchingType(text, className); if (null == type) { System.err.println(String.format( "Unknown type found. Please submit a bug report. (Text: %s, Name: %s, className: %s)", text, name, className)); continue; } try { final String linkPath = URLDecoder.decode(e.attr("href"), "UTF-8"); values.add(new SearchIndexValue(name, type, linkPath)); } catch (UnsupportedEncodingException ex) { throw new BuilderException("Error decoding a link", ex); } } return values; }
From source file:com.screenslicer.common.CommonUtil.java
public static String getFirstChildTextByTagName(Elements elements, String tagName) { if (elements == null) { return null; }/*from w w w . j a v a 2 s .c o m*/ if (elements.isEmpty()) { return null; } Element element = elements.get(0); for (Element child : element.children()) { if (child.tagName().equalsIgnoreCase(tagName)) { return child.text(); } } return null; }
From source file:io.jari.geenstijl.API.API.java
private static Artikel parseArtikel(Element artikel_el, Context context) throws ParseException { Artikel artikel = new Artikel(); //id/*from w w w . j a v a 2 s . c o m*/ artikel.id = Integer.parseInt(artikel_el.attr("id").substring(1)); //summary artikel.summary = artikel_el.select("a.more").first() != null; //titel artikel.titel = artikel_el.select("h1").text(); //plaatje if (PreferenceManager.getDefaultSharedPreferences(context).getBoolean("show_images", true)) { Element plaatje = artikel_el.select("img").first(); if (plaatje != null) { try { String url = plaatje.attr("src"); Log.d(TAG, "Downloading " + url); // artikel.plaatje = Drawable.createFromStream(((java.io.InputStream)new URL(plaatje.attr("src")).getContent()), null); artikel.plaatje = readBytes((InputStream) new URL(plaatje.attr("src")).getContent()); artikel.groot_plaatje = plaatje.hasClass("groot"); if (plaatje.hasAttr("width") && plaatje.hasAttr("height")) if (!plaatje.attr("width").equals("100") || !plaatje.attr("height").equals("100")) artikel.groot_plaatje = true; if (artikel.groot_plaatje) Log.i(TAG, " Done. Big image."); else Log.i(TAG, " Done."); } catch (Exception ex) { Log.w(TAG, "Unable to download image, Falling back... Reason: " + ex.getMessage()); artikel.plaatje = null; } } } //embed if (artikel_el.select("div.embed").first() != null) { //atm alleen support voor iframes Element frame = artikel_el.select("div.embed>iframe").first(); if (frame != null) artikel.embed = frame.attr("src"); } //embed (geenstijl.tv) if (!domain.equals("www.geenstijl.nl")) { //extract url from script Element scriptEl = artikel_el.select("script").first(); if (scriptEl != null) { String script = scriptEl.html(); Pattern pattern = Pattern.compile("'(.*)', fall"); Matcher matcher = pattern.matcher(script); if (matcher.find() && matcher.groupCount() == 1) { artikel.embed = matcher.group(1); } } } //footer shit Element footer = artikel_el.select("footer").first(); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm", Locale.US); artikel.datum = simpleDateFormat.parse(footer.select("time").first().attr("datetime")); StringTokenizer footer_items = new StringTokenizer(footer.text(), "|"); artikel.auteur = footer_items.nextToken().trim(); artikel.reacties = Integer.parseInt(footer.select("a.comments").text().replace(" reacties", "")); artikel.link = footer.select("a").first().attr("href"); //clean up artikel_el.select("h1").remove(); artikel_el.select(".embed").remove(); artikel_el.select("img").remove(); artikel_el.select("footer").remove(); artikel_el.select("a.more").remove(); artikel_el.select("script").remove(); //inhoud artikel.inhoud = artikel_el.html(); return artikel; }
From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java
public static void replaceDetailsTable(String path) throws IOException { File source = new File(path); Document report = null;// w w w . j av a 2 s .c o m try { report = Jsoup.parse(source, "UTF-8"); } catch (IOException e) { System.out.println("Unable to open [" + source.getAbsolutePath() + "] for parsing!"); } Elements dom = report.children(); Elements tds = report.select("table[id=tableStyle] td"); // select the tds from your table String temp_key = ""; for (Element td : tds) { // loop through them String[] temp_ar = td.toString().split("\""); String Key = temp_ar[1]; String Status = ""; if (td.toString().contains("pass.png")) { Status = "pass"; } if (td.toString().contains("fail.png")) { Status = "fail"; } if (td.toString().contains("skip.png")) { Status = "skip"; } if (TestCaseDesMap.containsKey(temp_key) && Status.length() > 1) { TestcaseStatusMap.put(temp_key, Status); temp_key = ""; } if (td.text().contains("Test Method")) { // found the one you want String TestcaseDes; if (!TestCaseDesMap.containsKey(Key)) { TestcaseDes = " --------- "; TestCaseDesMap.put(Key, TestcaseDes); temp_key = Key; } else { TestcaseDes = TestCaseDesMap.get(Key); temp_key = Key; // TestcaseStatusMap.put(Key, Status); } td.text(TestcaseDes); // Replace with your text } } Elements ths = report.select("table[id=tableStyle] th"); // select the tds from your table for (Element th : ths) { // loop through them if (th.text().contains("Method Type")) { // found the one you want th.text("TestCase Description"); } if (th.text().contains("Test Case Name")) { // found the one you want th.text("Testng Method"); } } if (!source.canWrite()) { System.out.println("Can't write this file!");//Just check if the file is writable or not } BufferedWriter bw = new BufferedWriter(new FileWriter(source)); bw.write(dom.toString()); //toString will give all the elements as a big string bw.close(); //Close to apply the changes // genarateFailureReport(new File("C:\\Users\\yanamalp\\Desktop\\Gen_jelly\\HTML_Design_Files\\CSS\\HtmlReport.html"), "c:\\"); }
From source file:com.nineash.hutsync.client.NetworkUtilities.java
/** * Perform 2-way sync with the server-side contacts. We send a request that * includes all the locally-dirty contacts so that the server can process * those changes, and we receive (and return) a list of contacts that were * updated on the server-side that need to be updated locally. * * @param account The account being synced * @param authtoken The authtoken stored in the AccountManager for this * account//from w w w . j a v a 2s .c om * @param serverSyncState A token returned from the server on the last sync * @param dirtyContacts A list of the contacts to send to the server * @return A list of contacts that we need to update locally */ public static void syncCalendar(Context context, Account account, String authtoken, long serverSyncState) throws JSONException, ParseException, IOException, AuthenticationException { ArrayList<SerializableCookie> myCookies; CookieStore cookieStore = new BasicCookieStore(); DefaultHttpClient hClient = getHttpClient(context); mContentResolver = context.getContentResolver(); final String[] weeknames = { "rota_this_week", "rota_next_week" }; long calendar_id = getCalendar(account); if (calendar_id == -1) { Log.e("CalendarSyncAdapter", "Unable to create HutSync event calendar"); return; } try { myCookies = (ArrayList<SerializableCookie>) fromString(authtoken); } catch (final IOException e) { Log.e(TAG, "IOException when expanding authtoken", e); return; } catch (final ClassNotFoundException e) { Log.e(TAG, "ClassNotFoundException when expanding authtoken", e); return; } for (SerializableCookie cur_cookie : myCookies) { cookieStore.addCookie(cur_cookie.getCookie()); } hClient.setCookieStore(cookieStore); Log.i(TAG, "Syncing to: " + SYNC_CONTACTS_URI); HttpGet httpget = new HttpGet(SYNC_CONTACTS_URI); final HttpResponse resp = hClient.execute(httpget); final String response = EntityUtils.toString(resp.getEntity()); HashMap<Long, SyncEntry> localEvents = new HashMap<Long, SyncEntry>(); ArrayList<Event> events = new ArrayList<Event>(); Pattern p = Pattern.compile("background-color:(#[[a-f][A-F][0-9]]{6})"); Pattern ps = Pattern .compile(".calendar-key span.(\\S+) \\{ background-color:(#[[a-f][A-F][0-9]]{6}); color:#fff; \\}"); if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { //check we are still logged in //if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_UNAUTHORIZED) { // Log.e(TAG, "Authentication exception in sending dirty contacts"); // throw new AuthenticationException(); //} //if we are logged in Map<String, String> shift_types = new HashMap<String, String>(); int length = weeknames.length; Document doc = Jsoup.parse(response); String full_name = doc.select("a[href*=" + account.name + "/profile]").first().text(); AccountManager mAccountManager = AccountManager.get(context); Account[] the_accounts = mAccountManager.getAccountsByType(Constants.ACCOUNT_TYPE); boolean multiple_accounts = (the_accounts.length > 1); Elements the_styles = doc.select("style"); for (Element the_style : the_styles) { String st_txt = the_style.html(); Matcher ms = ps.matcher(st_txt); while (ms.find()) { // Find each match in turn; String can't do this. String cname = ms.group(1); // Access a submatch group; String can't do this. String ccol = ms.group(2); String rname = doc.select("span." + cname).first().text(); Log.i(TAG, "LOOK: " + cname + ", " + ccol + ", " + rname); shift_types.put(ccol, rname); } } for (int w = 0; w < weeknames.length; w++) { Elements the_dates = doc.select("div.homepage div.accord-content table[id=" + weeknames[w] + "] tr.heading th:not(.skipStyles)"); //for (Element hidden : the_dates) { //0 is Mon, 6 is Sun Element the_date = the_dates.first(); //figure out the year for the Monday. String str_v = the_date.text(); String[] str_sub = str_v.split(" "); str_sub[1] = str_sub[1].trim(); String[] date_split = str_sub[1].split("/"); Calendar c = Calendar.getInstance(); int this_month = c.get(Calendar.MONTH) + 1; int monday_month = Integer.parseInt(date_split[1]); int this_year = c.get(Calendar.YEAR); int monday_year = this_year; if (this_month > monday_month) { monday_year++; } else if (this_month < monday_month) { monday_year--; } SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy"); Date date = new Date(); if (str_v != null && !str_v.isEmpty()) { String this_date = str_sub[1] + "/" + monday_year; //we need to figure out the year - sometimes its next year try { date = format.parse(this_date); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } Log.i(TAG, "Dates: " + this_date + " - " + date); } //} for (int i = 1; i < 8; ++i) { //1 is monday, 7 is sunday Elements hiddens = doc.select("div.homepage div.accord-content table[id=" + weeknames[w] + "] td:eq(" + Integer.toString(i) + "):not(.skipStyles) div.timeElem"); int add_days = i - 1; for (Element hidden : hiddens) { String str = hidden.text(); if (str != null && !str.isEmpty()) { String style = hidden.attr("style"); String bg_col = ""; Matcher m = p.matcher(style); if (m.find()) { bg_col = m.group(1); // Access a submatch group; String can't do this. } Log.i(TAG, "Time: " + str + "(" + bg_col + ")"); String ev_description = ""; //Location too? if (multiple_accounts) ev_description += full_name + "\n\n"; String[] times = str.split(" - "); String[] start_time = times[0].split(":"); String[] end_time = times[1].split(":"); int add_start_hours = Integer.parseInt(start_time[0]); int add_start_minutes = Integer.parseInt(start_time[1]); int add_finish_hours = Integer.parseInt(end_time[0]); int add_finish_minutes = Integer.parseInt(end_time[1]); String ev_shiftType = ""; if (bg_col != null && !bg_col.isEmpty()) { ev_shiftType = (String) shift_types.get(bg_col); } else { ev_shiftType = "Other"; } String ev_title = ev_shiftType + " Shift"; c.setTime(date); c.add(Calendar.DATE, add_days); c.add(Calendar.HOUR_OF_DAY, add_start_hours); c.add(Calendar.MINUTE, add_start_minutes); Date startDate = c.getTime(); long ev_id = startDate.getTime(); c.setTime(date); c.add(Calendar.DATE, add_days); if (add_finish_hours < add_start_hours) { //shift rolls to next day c.add(Calendar.HOUR_OF_DAY, 24); ev_description += "Shift finishes at " + times[1] + " on the next day\n\n"; } else { c.add(Calendar.HOUR_OF_DAY, add_finish_hours); c.add(Calendar.MINUTE, add_finish_minutes); } Date endDate = c.getTime(); Event ev = new Event(ev_id, ev_title, startDate, endDate, ev_description, ev_shiftType); events.add(ev); Log.i(TAG, "Event: " + ev); } } } } //next merge adjacent shifts SimpleDateFormat timeFormat = new SimpleDateFormat("HH:mm"); Event prev_event = null; for (Iterator<Event> it = events.iterator(); it.hasNext();) { Event cur_event = it.next(); if (prev_event != null) { if (prev_event.getEndDate().compareTo(cur_event.getStartDate()) == 0) { prev_event.setDescription(prev_event.getDescription() + "Merged consecutive shifts:\n" + timeFormat.format(prev_event.getStartDate()) + " to " + timeFormat.format(prev_event.getEndDate()) + " (" + prev_event.getShiftType() + ")\n" + timeFormat.format(cur_event.getStartDate()) + " to " + timeFormat.format(cur_event.getEndDate()) + " (" + cur_event.getShiftType() + ")\n\n"); prev_event.setEndDate(cur_event.getEndDate()); //TODO: only merge if other + FOH/BOH, note times in new description it.remove(); } } prev_event = cur_event; } //next, load local events Cursor c1 = mContentResolver.query( Events.CONTENT_URI.buildUpon().appendQueryParameter(Events.ACCOUNT_NAME, account.name) .appendQueryParameter(Events.ACCOUNT_TYPE, account.type).build(), new String[] { Events._ID, Events._SYNC_ID }, Events.CALENDAR_ID + "=?", new String[] { String.valueOf(calendar_id) }, null); while (c1 != null && c1.moveToNext()) { //if(is_full_sync) { // deleteEvent(context, account, c1.getLong(0)); //} else { SyncEntry entry = new SyncEntry(); entry.raw_id = c1.getLong(0); localEvents.put(c1.getLong(1), entry); //} } c1.close(); try { ArrayList<ContentProviderOperation> operationList = new ArrayList<ContentProviderOperation>(); for (Event event : events) { if (localEvents.containsKey(Long.valueOf(event.getId()))) { SyncEntry entry = localEvents.get(Long.valueOf(event.getId())); operationList.add(updateEvent(calendar_id, account, event, entry.raw_id)); } else { operationList.add(updateEvent(calendar_id, account, event, -1)); } if (operationList.size() >= 50) { try { mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList); } catch (Exception e) { e.printStackTrace(); } operationList.clear(); } } if (operationList.size() > 0) { try { mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList); } catch (Exception e) { e.printStackTrace(); } } } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); return; } } else { Log.e(TAG, "Server error in sending dirty contacts: " + resp.getStatusLine()); throw new IOException(); } }
From source file:app.sunstreak.yourpisd.net.Parser.java
/** * Reads assignment view page and returns teacher name. * //from w ww.j a v a 2s . c o m * Parses from this table: * * <table id='classStandardInfo'> <tbody> <tr> * <td> <div class='classInfoHeader'>Kapur, Sidharth (226344)</div>2013-08-29 <td> * <table> * <tr> <th style='width:1%'>Course:</th> <td><a href='javascript:ClassDetails.getClassDetails(2976981);' id='ClassTitle'>CHEM AP(00)</a></td></tr> * <tr> <th>Term:</th> <td>1st Six Weeks</td> </tr> * <tr> <th>Teacher:</th> <td><a href="mailto:Nicole.Lyssy@pisd.edu" title="Nicole.Lyssy@pisd.edu">Lyssy, Carol</a></td> </tr> * </table> * <td> </tr> </tbody></table> */ public static String[] teacher(String html) { Element doc = Jsoup.parse(html); Element classStandardInfo = doc.getElementById("classStandardInfo"); // teacher is the third row in this table Element teacher = classStandardInfo.getElementsByTag("table").get(0).getElementsByTag("tr").get(3) .getElementsByTag("td").get(0); // System.out.println(teacher); String email = ""; try { email = teacher.getElementsByTag("a").get(0).attr("title"); } catch (IndexOutOfBoundsException e) { // Senior release teacher have NO email. The <a> tag does not exist. } String teacherName = teacher.text(); return new String[] { teacherName, email }; }
From source file:io.andyc.papercut.api.PrintApi.java
/** * Get the different printers that we can print to and return an array of * the different printer types/* w ww. j a v a 2 s. co m*/ * * @return {PrinterOption[]} - An array of print options */ public static ArrayList<PrinterOption> getPrinterOptions(SessionFactory.Session session) throws IOException, ExpiredSessionException, PrintingException { Elements inputValues = PrintApi.buildConnection(session, "?service=action/1/UserWebPrint/0/%24ActionLink") .execute().parse().select("form").select("div.wizard-body").select("table.results").select("label"); ArrayList<PrinterOption> result = new ArrayList<>(); for (Element element : inputValues) { String name = element.select("input").attr("name"); String value = element.select("input").attr("value"); if (name.isEmpty() || value.isEmpty()) { throw new PrintingException("Cannot parse name and/or value of printing options"); } result.add(new PrinterOption(name, value, element.text())); } if (result.size() == 0) { throw new PrintingException("Cannot parse printer options"); } return result; }
From source file:com.mycompany.crawlertest.GrabPage.java
private void processHeaders(Elements select) { for (Element link : select) { String header = link.text() + "___" + depth; if (StringUtils.isBlank(header) || header.startsWith("#")) { continue; }/*from www . j a v a 2s. c o m*/ Uttils.HEADERS.add(header); } }