List of usage examples for org.jsoup.select Elements first
public Element first()
From source file:com.nineash.hutsync.client.NetworkUtilities.java
/** * Perform 2-way sync with the server-side contacts. We send a request that * includes all the locally-dirty contacts so that the server can process * those changes, and we receive (and return) a list of contacts that were * updated on the server-side that need to be updated locally. * * @param account The account being synced * @param authtoken The authtoken stored in the AccountManager for this * account//from ww w. j ava 2s . c o m * @param serverSyncState A token returned from the server on the last sync * @param dirtyContacts A list of the contacts to send to the server * @return A list of contacts that we need to update locally */ public static void syncCalendar(Context context, Account account, String authtoken, long serverSyncState) throws JSONException, ParseException, IOException, AuthenticationException { ArrayList<SerializableCookie> myCookies; CookieStore cookieStore = new BasicCookieStore(); DefaultHttpClient hClient = getHttpClient(context); mContentResolver = context.getContentResolver(); final String[] weeknames = { "rota_this_week", "rota_next_week" }; long calendar_id = getCalendar(account); if (calendar_id == -1) { Log.e("CalendarSyncAdapter", "Unable to create HutSync event calendar"); return; } try { myCookies = (ArrayList<SerializableCookie>) fromString(authtoken); } catch (final IOException e) { Log.e(TAG, "IOException when expanding authtoken", e); return; } catch (final ClassNotFoundException e) { Log.e(TAG, "ClassNotFoundException when expanding authtoken", e); return; } for (SerializableCookie cur_cookie : myCookies) { cookieStore.addCookie(cur_cookie.getCookie()); } hClient.setCookieStore(cookieStore); Log.i(TAG, "Syncing to: " + SYNC_CONTACTS_URI); HttpGet httpget = new HttpGet(SYNC_CONTACTS_URI); final HttpResponse resp = hClient.execute(httpget); final String response = EntityUtils.toString(resp.getEntity()); HashMap<Long, SyncEntry> localEvents = new HashMap<Long, SyncEntry>(); ArrayList<Event> events = new ArrayList<Event>(); Pattern p = Pattern.compile("background-color:(#[[a-f][A-F][0-9]]{6})"); Pattern ps = Pattern .compile(".calendar-key span.(\\S+) \\{ background-color:(#[[a-f][A-F][0-9]]{6}); color:#fff; \\}"); if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { //check we are still logged in //if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_UNAUTHORIZED) { // Log.e(TAG, "Authentication exception in sending dirty contacts"); // throw new AuthenticationException(); //} //if we are logged in Map<String, String> shift_types = new HashMap<String, String>(); int length = weeknames.length; Document doc = Jsoup.parse(response); String full_name = doc.select("a[href*=" + account.name + "/profile]").first().text(); AccountManager mAccountManager = AccountManager.get(context); Account[] the_accounts = mAccountManager.getAccountsByType(Constants.ACCOUNT_TYPE); boolean multiple_accounts = (the_accounts.length > 1); Elements the_styles = doc.select("style"); for (Element the_style : the_styles) { String st_txt = the_style.html(); Matcher ms = ps.matcher(st_txt); while (ms.find()) { // Find each match in turn; String can't do this. String cname = ms.group(1); // Access a submatch group; String can't do this. String ccol = ms.group(2); String rname = doc.select("span." + cname).first().text(); Log.i(TAG, "LOOK: " + cname + ", " + ccol + ", " + rname); shift_types.put(ccol, rname); } } for (int w = 0; w < weeknames.length; w++) { Elements the_dates = doc.select("div.homepage div.accord-content table[id=" + weeknames[w] + "] tr.heading th:not(.skipStyles)"); //for (Element hidden : the_dates) { //0 is Mon, 6 is Sun Element the_date = the_dates.first(); //figure out the year for the Monday. String str_v = the_date.text(); String[] str_sub = str_v.split(" "); str_sub[1] = str_sub[1].trim(); String[] date_split = str_sub[1].split("/"); Calendar c = Calendar.getInstance(); int this_month = c.get(Calendar.MONTH) + 1; int monday_month = Integer.parseInt(date_split[1]); int this_year = c.get(Calendar.YEAR); int monday_year = this_year; if (this_month > monday_month) { monday_year++; } else if (this_month < monday_month) { monday_year--; } SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy"); Date date = new Date(); if (str_v != null && !str_v.isEmpty()) { String this_date = str_sub[1] + "/" + monday_year; //we need to figure out the year - sometimes its next year try { date = format.parse(this_date); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } Log.i(TAG, "Dates: " + this_date + " - " + date); } //} for (int i = 1; i < 8; ++i) { //1 is monday, 7 is sunday Elements hiddens = doc.select("div.homepage div.accord-content table[id=" + weeknames[w] + "] td:eq(" + Integer.toString(i) + "):not(.skipStyles) div.timeElem"); int add_days = i - 1; for (Element hidden : hiddens) { String str = hidden.text(); if (str != null && !str.isEmpty()) { String style = hidden.attr("style"); String bg_col = ""; Matcher m = p.matcher(style); if (m.find()) { bg_col = m.group(1); // Access a submatch group; String can't do this. } Log.i(TAG, "Time: " + str + "(" + bg_col + ")"); String ev_description = ""; //Location too? if (multiple_accounts) ev_description += full_name + "\n\n"; String[] times = str.split(" - "); String[] start_time = times[0].split(":"); String[] end_time = times[1].split(":"); int add_start_hours = Integer.parseInt(start_time[0]); int add_start_minutes = Integer.parseInt(start_time[1]); int add_finish_hours = Integer.parseInt(end_time[0]); int add_finish_minutes = Integer.parseInt(end_time[1]); String ev_shiftType = ""; if (bg_col != null && !bg_col.isEmpty()) { ev_shiftType = (String) shift_types.get(bg_col); } else { ev_shiftType = "Other"; } String ev_title = ev_shiftType + " Shift"; c.setTime(date); c.add(Calendar.DATE, add_days); c.add(Calendar.HOUR_OF_DAY, add_start_hours); c.add(Calendar.MINUTE, add_start_minutes); Date startDate = c.getTime(); long ev_id = startDate.getTime(); c.setTime(date); c.add(Calendar.DATE, add_days); if (add_finish_hours < add_start_hours) { //shift rolls to next day c.add(Calendar.HOUR_OF_DAY, 24); ev_description += "Shift finishes at " + times[1] + " on the next day\n\n"; } else { c.add(Calendar.HOUR_OF_DAY, add_finish_hours); c.add(Calendar.MINUTE, add_finish_minutes); } Date endDate = c.getTime(); Event ev = new Event(ev_id, ev_title, startDate, endDate, ev_description, ev_shiftType); events.add(ev); Log.i(TAG, "Event: " + ev); } } } } //next merge adjacent shifts SimpleDateFormat timeFormat = new SimpleDateFormat("HH:mm"); Event prev_event = null; for (Iterator<Event> it = events.iterator(); it.hasNext();) { Event cur_event = it.next(); if (prev_event != null) { if (prev_event.getEndDate().compareTo(cur_event.getStartDate()) == 0) { prev_event.setDescription(prev_event.getDescription() + "Merged consecutive shifts:\n" + timeFormat.format(prev_event.getStartDate()) + " to " + timeFormat.format(prev_event.getEndDate()) + " (" + prev_event.getShiftType() + ")\n" + timeFormat.format(cur_event.getStartDate()) + " to " + timeFormat.format(cur_event.getEndDate()) + " (" + cur_event.getShiftType() + ")\n\n"); prev_event.setEndDate(cur_event.getEndDate()); //TODO: only merge if other + FOH/BOH, note times in new description it.remove(); } } prev_event = cur_event; } //next, load local events Cursor c1 = mContentResolver.query( Events.CONTENT_URI.buildUpon().appendQueryParameter(Events.ACCOUNT_NAME, account.name) .appendQueryParameter(Events.ACCOUNT_TYPE, account.type).build(), new String[] { Events._ID, Events._SYNC_ID }, Events.CALENDAR_ID + "=?", new String[] { String.valueOf(calendar_id) }, null); while (c1 != null && c1.moveToNext()) { //if(is_full_sync) { // deleteEvent(context, account, c1.getLong(0)); //} else { SyncEntry entry = new SyncEntry(); entry.raw_id = c1.getLong(0); localEvents.put(c1.getLong(1), entry); //} } c1.close(); try { ArrayList<ContentProviderOperation> operationList = new ArrayList<ContentProviderOperation>(); for (Event event : events) { if (localEvents.containsKey(Long.valueOf(event.getId()))) { SyncEntry entry = localEvents.get(Long.valueOf(event.getId())); operationList.add(updateEvent(calendar_id, account, event, entry.raw_id)); } else { operationList.add(updateEvent(calendar_id, account, event, -1)); } if (operationList.size() >= 50) { try { mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList); } catch (Exception e) { e.printStackTrace(); } operationList.clear(); } } if (operationList.size() > 0) { try { mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList); } catch (Exception e) { e.printStackTrace(); } } } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); return; } } else { Log.e(TAG, "Server error in sending dirty contacts: " + resp.getStatusLine()); throw new IOException(); } }
From source file:com.manisha.allmybooksarepacked.service.BookParser.java
private String findAuthor() { Elements author = doc.select(PathMapping.AUTHOR); return author.first().text().trim(); }
From source file:com.kasabi.data.movies.dbpedia.DBPediaBaseLinker.java
protected String getURI(HttpClient httpclient, String type, String string) { String uri = null;/*ww w. ja v a 2 s . c o m*/ try { String queryClass = type != null ? "&QueryClass=" + URLEncoder.encode(type, "UTF-8") : ""; String queryString = "?QueryString=" + URLEncoder.encode(string, "UTF-8"); HttpGet httpget = new HttpGet( "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch" + queryString + queryClass); ResponseHandler<String> responseHandler = new BasicResponseHandler(); String responseBody = httpclient.execute(httpget, responseHandler); Document document = Jsoup.parse(responseBody); Elements elements = document.select("result > uri"); if (!elements.isEmpty()) { uri = elements.first().text(); } } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return uri; }
From source file:com.jejking.hh.nord.corpus.AllrisHtmlToRawDrucksache.java
private String druckSacheId(Document htmlDoc) { Elements druckSacheIdElememnts = htmlDoc.select("#risname > h1"); Element druckSacheIdElement = druckSacheIdElememnts.first(); String elementText = druckSacheIdElement.text(); String druckSacheId = removeNonBreakingSpacesAndTrim(elementText.substring("Drucksache - ".length())); return druckSacheId; }
From source file:lolth.autohome.buy.AutohomeBuyInfoListTaskFetch.java
@Override protected void parsePage(Document doc, FetchTask task) throws Exception { Elements lis = doc.select("li.price-item"); for (Element li : lis) { AutohomeBuyInfoBean bean = new AutohomeBuyInfoBean(); bean.setUrl(task.getUrl());//from w w w .j av a 2s. c o m bean.setForumId(task.getExtra()); // post id Elements id = li.select("div.price-share a.share"); if (!id.isEmpty()) { String idStr = id.first().attr("data-target"); idStr = StringUtils.substringAfterLast(idStr, "_"); if (StringUtils.isBlank(idStr)) { continue; } bean.setId(idStr); } // Elements user = li.select("div.user-name a"); if (!user.isEmpty()) { String userUrl = user.first().absUrl("href"); String userId = StringUtils.substringAfterLast(userUrl, "/"); String userName = user.first().text(); bean.setUserId(userId); bean.setUserUrl(userUrl); bean.setUserName(userName); } // ? Elements postTime = li.select("div.user-name span"); if (!postTime.isEmpty()) { bean.setPostTime(StringUtils.trim(StringUtils.substringBefore(postTime.first().text(), "?"))); } Elements dataLis = li.select("div.price-item-bd li"); for (Element dataLi : dataLis) { String data = dataLi.text(); if (StringUtils.startsWith(data, "")) { bean.setCar(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setPrice(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setGuidePrice(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "?")) { bean.setTotalPrice(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setPurchaseTax(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "?")) { bean.setCommercialInsurance(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setVehicleUseTax(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setCompulsoryInsurance(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setLicenseFee(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "?")) { bean.setPromotion(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { bean.setBuyTime(StringUtils.trim(StringUtils.substringAfter(data, ""))); } if (StringUtils.startsWith(data, "")) { String area = StringUtils.trim(StringUtils.substringAfter(data, "")); String[] pAndC = StringUtils.splitByWholeSeparator(area, ",", 2); if (pAndC.length == 1) { bean.setBuyProvince(pAndC[0]); bean.setBuyCity(pAndC[0]); } if (pAndC.length == 2) { bean.setBuyProvince(pAndC[0]); bean.setBuyCity(pAndC[1]); } } if (StringUtils.startsWith(data, "")) { Elements level = dataLi.select("span.level"); // if (!level.isEmpty()) { bean.setSellerComment(level.first().text()); } // ? Elements seller = dataLi.select("a.title"); if (!seller.isEmpty()) { String sellerUrl = seller.first().absUrl("href"); String sellerName = seller.first().text(); String sellerId = StringUtils.substringAfterLast(sellerUrl, "/"); bean.setSellerId(sellerId); bean.setSellerName(sellerName); bean.setSellerUrl(sellerUrl); } // ? Elements sellerPhone = dataLi.select("em.phone-num"); if (!sellerPhone.isEmpty()) { bean.setSellerPhone(sellerPhone.first().text()); } // ? // Elements sellerAddress = dataLi.select("em.phone-num"); } if (StringUtils.startsWith(data, "?")) { bean.setBuyFeeling(StringUtils.trim(StringUtils.substringAfter(data, ""))); } } log.debug("Bean : {}", bean); bean.persistOnNotExist(); } }
From source file:am.roadpolice.roadpolice.downloaders.Submitter.java
/** * This function process URL and collect needed information about * violation and add it to Violation List (mViolationInfoList). * * @param url URL from which data will be processed. * @return null if no error occurs; otherwise ERROR1/ERROR2 if something * was changed in server behaviour, ERROR3 if error occurs while trying * to get JSOUP document, or server error text. *//*from ww w. ja v a 2s.c o m*/ private String processUrl(final String url) { Logger.debugLine(); Logger.debug(TAG, "Processing URL: " + url); HttpGet httpGet = new HttpGet(url); HttpParams httpParameters = new BasicHttpParams(); // Set the timeout in milliseconds until a connection is established. // The default value is zero, that means the timeout is not used. HttpConnectionParams.setConnectionTimeout(httpParameters, CONNECT_TIMEOUT); // Set the default socket timeout (SO_TIMEOUT) // in milliseconds which is the timeout for waiting for data. HttpConnectionParams.setSoTimeout(httpParameters, SOCKET_TIMEOUT); DefaultHttpClient httpClient = new DefaultHttpClient(httpParameters); try { HttpResponse response = httpClient.execute(httpGet); java.util.Scanner s = new java.util.Scanner(response.getEntity().getContent()).useDelimiter("\\A"); Document document = Jsoup.parse(s.hasNext() ? s.next() : null); // In the case if some data was provided not correct in // the url server generates page with red text, we handle // this situation and return text in the red block. Elements errorElement = document.getElementsByClass("red"); if (errorElement != null && errorElement.first() != null) { final String errorText = errorElement.first().text(); Logger.debug(TAG, "Found Error Element (RED): " + errorText); return errorText; } Elements tableElements = document.getElementsByClass("dahk_yes"); tableElements.addAll(document.getElementsByClass("dahk_no")); for (Element element : tableElements) { Elements tdElements = element.getElementsByTag("td"); int tdElementsCount = ViolationInfo.COL_OWNER_FULL_NAME; ViolationInfo violationInfo = null; for (Element tdElement : tdElements) { final String text = tdElement.text().trim(); // We found vehicle registration number. if (text.equalsIgnoreCase(mRegNum)) { // Create new class object to store data. violationInfo = new ViolationInfo(mRegNum); violationInfo.setCertificateNumber(mCerNum); continue; } // Violation Info object was not created, reason can be // that something is changed on the server side. if (violationInfo == null) { return ERROR_ON_SERVER_SIDE; } switch (tdElementsCount) { case ViolationInfo.COL_OWNER_FULL_NAME: violationInfo.setOwnerFullName(text); break; case ViolationInfo.COL_OWNER_ADDRESS: violationInfo.setOwnerAddress(text); break; case ViolationInfo.COL_TO_PAY: violationInfo.setToPay(text); break; case ViolationInfo.COL_PAYED: violationInfo.setPayed(text); break; case ViolationInfo.COL_CAR_MODEL: violationInfo.setCarModel(text); break; case ViolationInfo.COL_THE_DECISION: // Do Nothing ... break; case ViolationInfo.COL_DATE: violationInfo.setDate(text); break; case ViolationInfo.COL_PIN: violationInfo.setPin(text); break; default: return ERROR_WHILE_PARSING_DATA; } tdElementsCount++; } // Add items to the list. mViolationInfoList.add(violationInfo); } } catch (IOException e) { Logger.error(TAG, "----> Exception occurs while trying to get JSOUP document."); Logger.error(TAG, "----> Message: " + e.getMessage()); return ERROR_WHILE_CREATING_JSOUP; } return null; }
From source file:com.msds.km.service.Impl.DrivingLicenseRecognitionServcieiImpl.java
/** * html???//from w w w. j a v a 2 s .co m * * @param html * ??xml?java * @return * @throws Exception */ private DrivingLicense parseDrivingLicense(String html) throws Exception { if (html.isEmpty()) { logger.info(""); return null; } Document document = Jsoup.parse(html); if (document == null) { logger.info("html"); return null; } Elements fieldsets = document.select("div[class=left result] fieldset"); if (fieldsets.size() != 1) { logger.info("?"); return null; } Element regResult = fieldsets.first(); String result = regResult.html().trim(); // String removedStr = "<legend></legend>"; if (result.startsWith(removedStr)) { result = result.substring(removedStr.length()); } // ??xml result = StringEscapeUtils.unescapeXml(result); // result = "<drivingLicense>" + result + "</drivingLicense>"; return XMLUtils.toObject(result, DrivingLicense.class); }
From source file:org.javiermoreno.torrentscratcher.Runner.java
public Movie enrichMovieWithImdbSearch(Movie movie) { try {/*ww w . j a v a 2 s.c om*/ String url = "http://www.imdb.com/find?q={title}&s=all"; String title = movie.getOriginalTitle() != null ? movie.getOriginalTitle() : movie.getTitle(); url = url.replace("{title}", java.net.URLEncoder.encode(title, "UTF-8")); Document doc = Jsoup.connect(url).get(); Elements results = doc.select(".result_text a"); if (results.size() == 0) { log.warn("IMDB search 404: " + movie.getTitle()); return movie; } String link = results.first().attr("href"); String imdbId = link.substring("/title/".length(), link.indexOf("?") - 1); movie.setImdbId(imdbId); url = "http://www.imdb.com" + link; doc = Jsoup.connect(url).get(); movie.setGenre(doc.select("[itemprop=genre]").eq(0).text()); String rating = doc.select("[itemprop=aggregateRating] [itemprop=ratingValue]").text(); if (rating.isEmpty() == false) { movie.setRating(Double.valueOf(rating.replace(',', '.'))); } } catch (IOException ex) { log.warn(ex.getMessage()); } return movie; }
From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java
protected String getFirstSlideTitle(Document slidesDocument) { Elements slideCollection = getSlideCollection(slidesDocument); if (slideCollection != null && slideCollection.size() > 0) { Elements header = slideCollection.first().getElementsByTag("header"); if (header != null && header.size() > 0) { return header.first().text(); }/*w w w . j a v a2 s .c o m*/ } return ""; }
From source file:com.msds.km.service.Impl.YunmaiAPIDrivingLicenseRecognitionServcieiImpl.java
/** * html???/*from w ww. j a v a2 s . c o m*/ * @param html ??xml?java * @return */ protected DrivingLicense parseDrivingLicense(String html) { if (html.isEmpty()) { throw new RecognitionException("the html content is empty"); } Document document = Jsoup.parse(html); if (document == null) { throw new RecognitionException( "the document prased from html content is null, please check the website"); } Elements fieldsets = document.select("div[class=left result] fieldset"); if (fieldsets.size() != 1) { throw new RecognitionException( "the document should has result filedset, the content of the web page may be changed."); } Element regResult = fieldsets.first(); String result = regResult.html().trim(); // String removedStr = "<legend></legend>"; if (result.startsWith(removedStr)) { result = result.substring(removedStr.length()); } //??xml result = StringEscapeUtils.unescapeXml(result); // result = "<drivingLicense>" + result + "</drivingLicense>"; return (DrivingLicense) stream.fromXML(result); }