List of usage examples for org.jsoup.nodes Element html
public String html()
From source file:net.pixomania.crawler.W3C.parser.rules.principalAuthors.PrincipalAuthorsRule1.java
@Override public ArrayList<Person> run(String url, Document doc) { ArrayList<Person> editorList = new ArrayList<>(); Elements editors = doc.select("dt:contains(Principal Author) ~ dd"); if (editors.size() == 0) return null; boolean skip = false; for (Element editor : editors) { Element prev = editor.previousElementSibling(); if (prev.tagName().equals("dt")) { if (!prev.text().trim().toLowerCase().startsWith("principal author")) { skip = true;/*from w ww. j av a 2 s . co m*/ } } if (skip) { Element next = editor.nextElementSibling(); if (next != null) { if (next.text().trim().toLowerCase().startsWith("principal author")) { skip = false; continue; } } continue; } String[] splitted = editor.html().split(","); for (String split : splitted) { if (!split.isEmpty()) { if (split.toLowerCase().startsWith("(in alphabetic") || split.toLowerCase().startsWith("see acknowl") || split.toLowerCase().startsWith("the w3") || split.toLowerCase().startsWith("(see ac") || split.toLowerCase().startsWith("see participants") || split.toLowerCase().contains("note:")) { Log.log("warning", "Spec " + url + " may refer to a different section!"); continue; } if (split.equals("WHATWG:") || split.equals("W3C:")) continue; Document newdoc = Jsoup.parse(split.replaceAll("\n", "")); Person result = NameParser.parse(newdoc.text()); if (result == null) continue; for (int i = 0; i < newdoc.select("a").size(); i++) { if (!newdoc.select("a").get(i).attr("href").isEmpty()) { if (newdoc.select("a").get(i).attr("href").contains("@")) { result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", "")); } else { result.addWebsite(newdoc.select("a").get(i).attr("href")); } } } editorList.add(result); } } } if (editorList.size() == 0) return null; return editorList; }
From source file:me.vertretungsplan.parser.UntisCommonParser.java
/** * Parses a "Nachrichten zum Tag" ("daily news") table from an Untis schedule * * @param table the <code>table</code>-Element to be parsed * @param day the {@link SubstitutionScheduleDay} where the messages should be stored */// w w w . j ava 2s. co m private void parseMessages(Element table, SubstitutionScheduleDay day) { Elements zeilen = table.select("tr:not(:contains(Nachrichten zum Tag))"); for (Element i : zeilen) { Elements spalten = i.select("td"); String info = ""; for (Element b : spalten) { info += "\n" + TextNode.createFromEncoded(b.html(), null).getWholeText(); } info = info.substring(1); // remove first \n day.addMessage(info); } }
From source file:biz.shadowservices.DegreesToolbox.DataFetcher.java
public FetchResult updateData(Context context, boolean force) { //Open database DBOpenHelper dbhelper = new DBOpenHelper(context); SQLiteDatabase db = dbhelper.getWritableDatabase(); // check for internet connectivity try {// w ww. j a va 2s .c om if (!isOnline(context)) { Log.d(TAG, "We do not seem to be online. Skipping Update."); return FetchResult.NOTONLINE; } } catch (Exception e) { exceptionReporter.reportException(Thread.currentThread(), e, "Exception during isOnline()"); } SharedPreferences sp = PreferenceManager.getDefaultSharedPreferences(context); if (!force) { try { if (sp.getBoolean("loginFailed", false) == true) { Log.d(TAG, "Previous login failed. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Previous login failed. Skipping Update."); return FetchResult.LOGINFAILED; } if (sp.getBoolean("autoupdates", true) == false) { Log.d(TAG, "Automatic updates not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Automatic updates not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } if (!isBackgroundDataEnabled(context) && sp.getBoolean("obeyBackgroundData", true)) { Log.d(TAG, "Background data not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Background data not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } if (!isAutoSyncEnabled() && sp.getBoolean("obeyAutoSync", true) && sp.getBoolean("obeyBackgroundData", true)) { Log.d(TAG, "Auto sync not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Auto sync not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } if (isWifi(context) && !sp.getBoolean("wifiUpdates", true)) { Log.d(TAG, "On wifi, and wifi auto updates not allowed. Skipping Update"); DBLog.insertMessage(context, "i", TAG, "On wifi, and wifi auto updates not allowed. Skipping Update"); return FetchResult.NOTALLOWED; } else if (!isWifi(context)) { Log.d(TAG, "We are not on wifi."); if (!isRoaming(context) && !sp.getBoolean("2DData", true)) { Log.d(TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } else if (isRoaming(context) && !sp.getBoolean("roamingData", false)) { Log.d(TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } } } catch (Exception e) { exceptionReporter.reportException(Thread.currentThread(), e, "Exception while finding if to update."); } } else { Log.d(TAG, "Update Forced"); } try { String username = sp.getString("username", null); String password = sp.getString("password", null); if (username == null || password == null) { DBLog.insertMessage(context, "i", TAG, "Username or password not set."); return FetchResult.USERNAMEPASSWORDNOTSET; } // Find the URL of the page to send login data to. Log.d(TAG, "Finding Action. "); HttpGetter loginPageGet = new HttpGetter("https://secure.2degreesmobile.co.nz/web/ip/login"); String loginPageString = loginPageGet.execute(); if (loginPageString != null) { Document loginPage = Jsoup.parse(loginPageString, "https://secure.2degreesmobile.co.nz/web/ip/login"); Element loginForm = loginPage.getElementsByAttributeValue("name", "loginFrm").first(); String loginAction = loginForm.attr("action"); // Send login form List<NameValuePair> loginValues = new ArrayList<NameValuePair>(); loginValues.add(new BasicNameValuePair("externalURLRedirect", "")); loginValues.add(new BasicNameValuePair("hdnAction", "login_userlogin")); loginValues.add(new BasicNameValuePair("hdnAuthenticationType", "M")); loginValues.add(new BasicNameValuePair("hdnlocale", "")); loginValues.add(new BasicNameValuePair("userid", username)); loginValues.add(new BasicNameValuePair("password", password)); Log.d(TAG, "Sending Login "); HttpPoster sendLoginPoster = new HttpPoster(loginAction, loginValues); // Parse result String loginResponse = sendLoginPoster.execute(); Document loginResponseParsed = Jsoup.parse(loginResponse); // Determine if this is a pre-pay or post-paid account. boolean postPaid; if (loginResponseParsed .getElementById("p_CustomerPortalPostPaidHomePage_WAR_customerportalhomepage") == null) { Log.d(TAG, "Pre-pay account or no account."); postPaid = false; } else { Log.d(TAG, "Post-paid account."); postPaid = true; } String homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/home"; if (postPaid) { homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/postpaid"; } HttpGetter homepageGetter = new HttpGetter(homepageUrl); String homepageHTML = homepageGetter.execute(); Document homePage = Jsoup.parse(homepageHTML); Element accountSummary = homePage.getElementById("accountSummary"); if (accountSummary == null) { Log.d(TAG, "Login failed."); return FetchResult.LOGINFAILED; } db.delete("cache", "", null); /* This code fetched some extra details for postpaid users, but on reflection they aren't that useful. * Might reconsider this. * if (postPaid) { Element accountBalanceSummaryTable = accountSummary.getElementsByClass("tableBillSummary").first(); Elements rows = accountBalanceSummaryTable.getElementsByTag("tr"); int rowno = 0; for (Element row : rows) { if (rowno > 1) { break; } //Log.d(TAG, "Starting row"); //Log.d(TAG, row.html()); Double value; try { Element amount = row.getElementsByClass("tableBillamount").first(); String amountHTML = amount.html(); Log.d(TAG, amountHTML.substring(1)); value = Double.parseDouble(amountHTML.substring(1)); } catch (Exception e) { Log.d(TAG, "Failed to parse amount from row."); value = null; } String expiresDetails = ""; String expiresDate = null; String name = null; try { Element details = row.getElementsByClass("tableBilldetail").first(); name = details.ownText(); Element expires = details.getElementsByTag("em").first(); if (expires != null) { expiresDetails = expires.text(); } Log.d(TAG, expiresDetails); Pattern pattern; pattern = Pattern.compile("\\(payment is due (.*)\\)"); Matcher matcher = pattern.matcher(expiresDetails); if (matcher.find()) { /*Log.d(TAG, "matched expires"); Log.d(TAG, "group 0:" + matcher.group(0)); Log.d(TAG, "group 1:" + matcher.group(1)); Log.d(TAG, "group 2:" + matcher.group(2)); * String expiresDateString = matcher.group(1); Date expiresDateObj; if (expiresDateString != null) { if (expiresDateString.length() > 0) { try { expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString); expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj); } catch (java.text.ParseException e) { Log.d(TAG, "Could not parse date: " + expiresDateString); } } } } } catch (Exception e) { Log.d(TAG, "Failed to parse details from row."); } String expirev = null; ContentValues values = new ContentValues(); values.put("name", name); values.put("value", value); values.put("units", "$NZ"); values.put("expires_value", expirev ); values.put("expires_date", expiresDate); db.insert("cache", "value", values ); rowno++; } } */ Element accountSummaryTable = accountSummary.getElementsByClass("tableAccountSummary").first(); Elements rows = accountSummaryTable.getElementsByTag("tr"); for (Element row : rows) { // We are now looking at each of the rows in the data table. //Log.d(TAG, "Starting row"); //Log.d(TAG, row.html()); Double value; String units; try { Element amount = row.getElementsByClass("tableBillamount").first(); String amountHTML = amount.html(); //Log.d(TAG, amountHTML); String[] amountParts = amountHTML.split(" ", 2); //Log.d(TAG, amountParts[0]); //Log.d(TAG, amountParts[1]); if (amountParts[0].contains("Included") || amountParts[0].equals("All You Need") || amountParts[0].equals("Unlimited Text*")) { value = Values.INCLUDED; } else { try { value = Double.parseDouble(amountParts[0]); } catch (NumberFormatException e) { exceptionReporter.reportException(Thread.currentThread(), e, "Decoding value."); value = 0.0; } } units = amountParts[1]; } catch (NullPointerException e) { //Log.d(TAG, "Failed to parse amount from row."); value = null; units = null; } Element details = row.getElementsByClass("tableBilldetail").first(); String name = details.getElementsByTag("strong").first().text(); Element expires = details.getElementsByTag("em").first(); String expiresDetails = ""; if (expires != null) { expiresDetails = expires.text(); } Log.d(TAG, expiresDetails); Pattern pattern; if (postPaid == false) { pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?expiring on (.*)\\)"); } else { pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?will expire on (.*)\\)"); } Matcher matcher = pattern.matcher(expiresDetails); Double expiresValue = null; String expiresDate = null; if (matcher.find()) { /*Log.d(TAG, "matched expires"); Log.d(TAG, "group 0:" + matcher.group(0)); Log.d(TAG, "group 1:" + matcher.group(1)); Log.d(TAG, "group 2:" + matcher.group(2)); */ try { expiresValue = Double.parseDouble(matcher.group(1)); } catch (NumberFormatException e) { expiresValue = null; } String expiresDateString = matcher.group(2); Date expiresDateObj; if (expiresDateString != null) { if (expiresDateString.length() > 0) { try { expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString); expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj); } catch (java.text.ParseException e) { Log.d(TAG, "Could not parse date: " + expiresDateString); } } } } ContentValues values = new ContentValues(); values.put("name", name); values.put("value", value); values.put("units", units); values.put("expires_value", expiresValue); values.put("expires_date", expiresDate); db.insert("cache", "value", values); } if (postPaid == false) { Log.d(TAG, "Getting Value packs..."); // Find value packs HttpGetter valuePacksPageGet = new HttpGetter( "https://secure.2degreesmobile.co.nz/group/ip/prevaluepack"); String valuePacksPageString = valuePacksPageGet.execute(); //DBLog.insertMessage(context, "d", "", valuePacksPageString); if (valuePacksPageString != null) { Document valuePacksPage = Jsoup.parse(valuePacksPageString); Elements enabledPacks = valuePacksPage.getElementsByClass("yellow"); for (Element enabledPack : enabledPacks) { Element offerNameElemt = enabledPack .getElementsByAttributeValueStarting("name", "offername").first(); if (offerNameElemt != null) { String offerName = offerNameElemt.val(); DBLog.insertMessage(context, "d", "", "Got element: " + offerName); ValuePack[] packs = Values.valuePacks.get(offerName); if (packs == null) { DBLog.insertMessage(context, "d", "", "Offer name: " + offerName + " not matched."); } else { for (ValuePack pack : packs) { ContentValues values = new ContentValues(); values.put("plan_startamount", pack.value); values.put("plan_name", offerName); DBLog.insertMessage(context, "d", "", "Pack " + pack.type.id + " start value set to " + pack.value); db.update("cache", values, "name = '" + pack.type.id + "'", null); } } } } } } SharedPreferences.Editor prefedit = sp.edit(); Date now = new Date(); prefedit.putString("updateDate", DateFormatters.ISO8601FORMAT.format(now)); prefedit.putBoolean("loginFailed", false); prefedit.putBoolean("networkError", false); prefedit.commit(); DBLog.insertMessage(context, "i", TAG, "Update Successful"); return FetchResult.SUCCESS; } } catch (ClientProtocolException e) { DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage()); return FetchResult.NETWORKERROR; } catch (IOException e) { DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage()); return FetchResult.NETWORKERROR; } finally { db.close(); } return null; }
From source file:net.vexelon.mobileops.GLBClient.java
public String getCurrentBalance() throws HttpClientException { StringBuilder builder = new StringBuilder(100); HttpResponse resp;/*from ww w . j a v a2s .c om*/ long bytesCount = 0; try { String url = HTTP_MYTELENOR + GLBRequestType.GET_BALANCE.getPath(); url += '?'; url += new Date().getTime(); HttpGet httpGet = new HttpGet(url); // httpGet.setHeader("X-Requested-With", "XMLHttpRequest"); resp = httpClient.execute(httpGet, httpContext); } catch (Exception e) { throw new HttpClientException("Client protocol error!" + e.getMessage(), e); } StatusLine status = resp.getStatusLine(); if (status.getStatusCode() != HttpStatus.SC_OK) throw new HttpClientException(status.getReasonPhrase(), status.getStatusCode()); try { HttpEntity entity = resp.getEntity(); // bytes downloaded bytesCount = entity.getContentLength() > 0 ? entity.getContentLength() : 0; Document doc = Jsoup.parse(entity.getContent(), RESPONSE_ENCODING, ""); Elements elements; // period bill elements = doc.select("#outstanding-amount"); if (elements.size() > 0) { Elements divs = elements.get(0).select("div"); for (Element el : divs) { String elClass = el.className(); if (elClass.contains("custme-select") || elClass.equalsIgnoreCase("history")) { builder.insert(0, el.html()); } } } // current bill elements = doc.select("#bars-wrapper .p-price"); if (elements.size() > 0) { Element el = elements.get(0); builder.insert(0, el.html()); } return builder.toString(); } catch (ClientProtocolException e) { throw new HttpClientException("Client protocol error!" + e.getMessage(), e); } catch (IOException e) { throw new HttpClientException("Client error!" + e.getMessage(), e); } finally { addDownloadedBytesCount(bytesCount); } }
From source file:cn.edu.hfut.dmic.contentextractor.ContentExtractor.java
/** * ??:/*from w w w. j a v a 2 s . com*/ * 1. ??? * 2. ??????? * 3. ?? * 4. ?? ?? * 5. ? * * @return * @throws XpathSyntaxErrorException */ private String getAuthor() throws XpathSyntaxErrorException { String author = ""; if (StringUtils.isBlank(srcTime)) { author = getAuthor(doc.body().html()); return author; } Element cur = doc.body().select("*:containsOwn(" + srcTime + ")").first(); if (cur == null) { LOG.warn("?srcTime=" + srcTime); author = getAuthor(doc.body().html()); return author; } if (!noText(cur)) { String arr[] = cur.html().split(srcTime); for (String text : arr) { author = getShortText(text); if (!StringUtils.isBlank(author)) return author; } } Element parent = cur.parent(); while (parent != null && noText(parent)) { cur = parent; parent = parent.parent(); } author = getAuthor(parent.html()); if (!StringUtils.isBlank(author)) return author; Element pre = cur.previousElementSibling(); while (pre != null && noText(pre)) { pre = pre.previousElementSibling(); } if (pre != null) { author = getShortText(pre.text()); } if (!StringUtils.isBlank(author)) return author; Element next = cur.nextElementSibling(); while (next != null && noText(next)) { next = next.nextElementSibling(); } if (next != null) { author = getShortText(next.text()); } if (!StringUtils.isBlank(author)) return author; author = getShortText(parent.html().replace(srcTime, " ")); if (!StringUtils.isBlank(author)) return author; author = getAuthor(doc.body().html()); if (StringUtils.isBlank(author)) { return author_bak; } return author; }
From source file:com.weavers.duqhan.business.impl.ProductServiceImpl.java
@Override public void loadTempProducts(List<StatusBean> statusBeans) { boolean isSuccess = true; String startDate = new Date().toString(); Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, "(==I==)DATE: " + startDate + "Store product details in temp product table start....."); try {/* w w w.j a v a 2 s . com*/ String status = ""; for (StatusBean statusBean : statusBeans) { status = "Link duplicate"; Temtproductlinklist temtproductlinklist = temtproductlinklistDao.loadById(statusBean.getId()); if (temtproductlinklist != null && temtproductlinklist.getStatus() == 0) { Product testProduct = productDao.getProductByExternelLink(temtproductlinklist.getLink()); if (testProduct == null) { String value = ""; Elements detailMain; Elements detailSub; Elements specifics; double votes = 0.0; double stars = 0.0; double feedback = 0.0; String url = temtproductlinklist.getLink(); try { testProduct = new Product(); Product savedTestProduct; //=================== Random sleep START ===================// // TimeUnit.SECONDS.sleep(30 + (int) (Math.random() * 100)); Random randomObj = new Random(); TimeUnit.SECONDS.sleep(randomObj.ints(30, 60).findFirst().getAsInt()); //=================== Random sleep END =====================// Document doc = Jsoup.connect(url).get(); detailMain = doc.select("#j-detail-page"); if (!detailMain.isEmpty()) { //=================== Criteria Block START==================// detailMain = doc.select(".rantings-num"); if (!detailMain.isEmpty()) { votes = Double.valueOf(detailMain.text().split(" votes")[0].split("\\(")[1]); } detailMain = doc.select(".percent-num"); if (!detailMain.isEmpty()) { stars = Double.valueOf(detailMain.text()); } detailMain = doc.select("ul.ui-tab-nav li[data-trigger='feedback'] a"); if (!detailMain.isEmpty()) { feedback = Double.valueOf(detailMain.text().split("\\(")[1].split("\\)")[0]); } //=================== Criteria Block END==================// if (votes > 10.0 && stars > 4.0 && feedback > 4.0) { detailMain = doc.select(".detail-wrap .product-name"); testProduct.setName(detailMain .text());/*.substring(0, Math.min(detailMain.text().length(), 50))*/ detailMain = doc.select(".detail-wrap .product-name"); testProduct.setDescription(detailMain.text()); testProduct.setExternalLink(url); testProduct.setVendorId(1l);//?????????????????????? //=================== Packaging block START==================// Double weight = 1.0; Double width = 1.0; Double height = 1.0; Double length = 1.0; detailMain = doc.select( "div#j-product-desc div.pnl-packaging-main ul li.packaging-item"); for (Element element : detailMain) { String packagingTitle = element.select("span.packaging-title").text(); String packagingDesc = element.select("span.packaging-des").text(); if (packagingTitle.trim().equals("Package Weight:")) { String str = packagingDesc; str = str.replaceAll("[^.?0-9]+", " "); if (Arrays.asList(str.trim().split(" ")) != null) { if (!Arrays.asList(str.trim().split(" ")).isEmpty()) { try { weight = Double.parseDouble( Arrays.asList(str.trim().split(" ")).get(0)); } catch (Exception e) { weight = 1.0; } } } System.out.println("weight == " + weight); } else if (packagingTitle.trim().equals("Package Size:")) { String str = packagingDesc; str = str.replaceAll("[^.?0-9]+", " "); if (Arrays.asList(str.trim().split(" ")) != null) { if (!Arrays.asList(str.trim().split(" ")).isEmpty()) { try { width = Double.parseDouble( Arrays.asList(str.trim().split(" ")).get(0)); height = Double.parseDouble( Arrays.asList(str.trim().split(" ")).get(1)); length = Double.parseDouble( Arrays.asList(str.trim().split(" ")).get(2)); } catch (Exception e) { width = 1.0; height = 1.0; length = 1.0; } } } System.out.println("width == " + width); System.out.println("height == " + height); System.out.println("length == " + length); } } //=================== Packaging block END==================// //=================== Category block START==================// detailMain = doc.select("div.ui-breadcrumb div.container a"); Long productCategoryId = 0L; String parentPath = ""; String thisCategory = detailMain.last().text().trim(); System.out.println("thisCategory == " + thisCategory); Category parentCategory = new Category(); parentCategory.setId(0L); parentCategory.setParentPath(""); for (Element element : detailMain) { String newCategory; newCategory = element.text().trim(); System.out.println("newCategory======" + newCategory); if (newCategory.equals("Home") || newCategory.equals("All Categories")) { } else { Category category = categoryDao.getCategoryByName(newCategory); if (category != null) { if (category.getName().equals(thisCategory)) { productCategoryId = category.getId(); parentPath = category.getParentPath(); } parentCategory = category; } else { category = new Category(); category.setId(null); category.setName(newCategory); category.setParentId(parentCategory.getId()); category.setParentPath(parentCategory.getParentPath() + parentCategory.getId() + "="); category.setQuantity(0); category.setImgUrl("-"); category.setDisplayText(newCategory); Category category2 = categoryDao.save(category); if (category.getName().equals(thisCategory)) { productCategoryId = category2.getId(); parentPath = category2.getParentPath(); } parentCategory = category2; } } } //=================== Category block END==================// //=============== Specifications block START==============// detailMain = doc.select(".product-property-list .property-item"); String specifications = ""; for (Element element : detailMain) { specifications = specifications + element.select(".propery-title").get(0).text().replace(",", "/") .replace(":", "-") + ":" + element.select(".propery-des").get(0).text() .replace(",", "/").replace(":", "-") + ",";//TODO:, check } //=============== Specifications Block END==============// //=============== Shipping Time Block START==============// String shippingTime = ""; detailMain = doc.select(".shipping-days[data-role='delivery-days']"); System.out.println("value detailMain" + detailMain.toString()); shippingTime = detailMain.text(); //=============== Shipping Time Block END==============// //=============== Shipping Cost Block START==============// detailMain = doc.select(".logistics-cost"); value = detailMain.text(); if (!value.equalsIgnoreCase("Free Shipping")) { // f = 0.00; } else { // f = Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1")); } //=============== Shipping Cost Block END==============// //=================Product save 1st START==============// testProduct.setCategoryId(productCategoryId); testProduct.setLastUpdate(new Date()); testProduct.setParentPath(parentPath); testProduct.setImgurl("-"); testProduct.setProperties("-"); testProduct.setProductWidth(width); testProduct.setProductLength(length); testProduct.setProductWeight(weight); testProduct.setProductHeight(height); testProduct.setShippingRate(0.0); testProduct.setShippingTime("45"); testProduct.setSpecifications(specifications); savedTestProduct = productDao.save(testProduct); //====================Product save 1st END==============// //========= Property, Property Value, Property Product Map Block START ========// double discountPrice = 0.0; double actualPrice = 0.0; double markupPrice = 0.0; String id = ""; String allProperties = ""; //------------------------Read Color css START---------------------// specifics = doc.select("#j-product-info-sku dl.p-property-item"); Elements cssdetailMain = doc.select("link[href]"); Document cssdoc = new Document(""); System.out.println( "====================================================cssdetailMain" + cssdetailMain.size()); for (Element element : cssdetailMain) { String cssurl = element.attr("abs:href"); if (cssurl.contains("??main-detail")) { try { cssdoc = Jsoup.connect(cssurl).get(); } catch (IOException ex) { } break; } } //-----------------------Read Color css END--------------------------// //-----------Product Property, Property Value START--------// Map<String, ProductPropertyvalues> propertyValuesMap = new HashMap<>(); if (!specifics.isEmpty()) { ProductProperties testPorperties; ProductProperties saveTestPorperties; ProductPropertyvalues testPropertyValues; for (Element specific : specifics) { System.out.println("head ==== " + specific.select("dt").text()); testPorperties = productPropertiesDao .loadByName(specific.select("dt").text()); if (testPorperties == null) { testPorperties = new ProductProperties(); testPorperties.setPropertyName(specific.select("dt").text()); saveTestPorperties = productPropertiesDao.save(testPorperties); } else { saveTestPorperties = testPorperties; } allProperties = allProperties + saveTestPorperties.getId().toString() + "-"; detailSub = specific.select("dd ul li"); String valu = "-"; for (Element element : detailSub) { testPropertyValues = new ProductPropertyvalues(); id = element.select("a[data-sku-id]").attr("data-sku-id").trim(); testPropertyValues.setRefId(id); if (element.hasClass("item-sku-image")) { valu = element.select("a img[src]").get(0).absUrl("src") .split(".jpg")[0] + ".jpg"; String title = element.select("a img").get(0).attr("title"); String imgUrl = GoogleBucketFileUploader .uploadProductImage(valu, savedTestProduct.getId()); valu = "<img src='" + imgUrl + "' title='" + title + "' style='height:40px; width:40px;'/>"; } else if (element.hasClass("item-sku-color")) { String style = cssdoc.html().split("sku-color-" + id)[1] .split("}")[0].substring(1); valu = "<span style='" + style + "' ; height:40px; width:40px; display:block;'></span>"; } else { valu = element.select("a span").toString(); } System.out.println("valu === " + valu); testPropertyValues.setProductId(savedTestProduct.getId()); testPropertyValues.setPropertyId(saveTestPorperties.getId()); testPropertyValues.setValueName(valu); propertyValuesMap.put(id, productPropertyvaluesDao.save(testPropertyValues)); } } savedTestProduct.setProperties(allProperties); } //-----------Product Property, Property Value END--------// //----------------------Read json START------------------// List<AxpProductDto> axpProductDtos = new ArrayList<>(); Elements scripts = doc.select("script"); // Get the script part for (Element script : scripts) { if (script.html().contains("var skuProducts=")) { String jsonData = ""; jsonData = script.html().split("var skuProducts=")[1] .split("var GaData")[0].trim(); jsonData = jsonData.substring(0, jsonData.length() - 1); Gson gsonObj = new Gson(); axpProductDtos = Arrays .asList(gsonObj.fromJson(jsonData, AxpProductDto[].class)); break; } } //----------------------Read json END------------------// //-------------Product Properties Map START------------// for (AxpProductDto thisAxpProductDto : axpProductDtos) { SkuVal skuVal = thisAxpProductDto.getSkuVal(); if (skuVal.getActSkuCalPrice() != null) { value = skuVal.getActSkuCalPrice().trim(); discountPrice = CurrencyConverter.usdTOinr( Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1"))); value = skuVal.getSkuCalPrice().trim(); actualPrice = CurrencyConverter.usdTOinr( Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1"))); markupPrice = discountPrice * 0.15 + 100; discountPrice = Math.ceil((discountPrice + markupPrice) / 10) * 10; actualPrice = Math.round(actualPrice + markupPrice); } else { discountPrice = 0.0; value = skuVal.getSkuCalPrice().trim(); actualPrice = CurrencyConverter.usdTOinr( Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1"))); markupPrice = actualPrice * 0.15 + 100; discountPrice = Math.round(actualPrice + markupPrice); actualPrice = Math.round(actualPrice + markupPrice); } ProductPropertiesMap productPropertyMap = new ProductPropertiesMap(); String myPropValueIds = ""; if (thisAxpProductDto.getSkuAttr() != null) { String[] skuPropIds = thisAxpProductDto.getSkuPropIds().split(","); for (String skuPropId : skuPropIds) { myPropValueIds = myPropValueIds + propertyValuesMap.get(skuPropId).getId().toString() + "_"; } productPropertyMap.setPropertyvalueComposition(myPropValueIds); } else { productPropertyMap.setPropertyvalueComposition("_"); } productPropertyMap.setDiscount(discountPrice); productPropertyMap.setPrice(actualPrice); productPropertyMap.setProductId(savedTestProduct); productPropertyMap.setQuantity(5l); productPropertiesMapDao.save(productPropertyMap); } //-------------Product Properties Map START------------// //========= Property, Property Value, Property Product Map Block END ========// //============= Multiple Image Block START =============// detailMain = doc.select("ul.image-thumb-list span.img-thumb-item img[src]"); int flg = 0; String imgUrl = ""; for (Element element : detailMain) { imgUrl = GoogleBucketFileUploader.uploadProductImage( element.absUrl("src").split(".jpg")[0] + ".jpg", savedTestProduct.getId()); if (flg == 0) { flg++; savedTestProduct.setImgurl(imgUrl); } else { ProductImg productImg = new ProductImg(); productImg.setId(null); productImg.setImgUrl(imgUrl); productImg.setProductId(savedTestProduct.getId()); productImgDao.save(productImg); } } //============= Multiple Image Block END =============// //=================Product save final START==============// if (productDao.save(savedTestProduct) != null) { temtproductlinklist.setStatus(1);// temtproductlinklistDao.save(temtproductlinklist); status = "Success"; } //=================Product save final START==============// } else { temtproductlinklist.setStatus(2);// temtproductlinklistDao.save(temtproductlinklist); status = "criteria mismatch"; } } else { status = "Page not found"; } } catch (Exception ex) { System.out.println( "=============================================================Exception1" + ex); temtproductlinklist.setStatus(4);// temtproductlinklistDao.save(temtproductlinklist); System.out.println("Exception === " + ex); status = "Failure"; Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, "(==E==)DATE: " + new Date().toString() + "Store product details in temp product table get error in sub process.....\n Link Id: " + statusBean.getId() + "\n Started on" + startDate, ex); } } else { temtproductlinklist.setStatus(3);// temtproductlinklistDao.save(temtproductlinklist); status = "Product exsist"; } } // String body = "Id: " + temtproductlinklist.getId() + "<br/> Status: " + status; // MailSender.sendEmail("krisanu.nandi@pkweb.in", "Product captured", body, "subhendu.sett@pkweb.in"); statusBean.setStatus(status); } System.out.println("=============================================================status" + status); } catch (Exception e) { System.out.println("=============================================================Exception2" + e); isSuccess = false; String body = "(==E==)DATE: " + new Date().toString() + "Store product details in temp product table get error.....<br/> Started on" + startDate + "<br/>"; Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, body, e); // MailSender.sendEmail("krisanu.nandi@pkweb.in", "Stopped store product details", body + e.getLocalizedMessage(), "subhendu.sett@pkweb.in"); } if (isSuccess) { String body = "(==I==)DATE: " + new Date().toString() + "Store product details in temp product table end.....<br/> Started on" + startDate; Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, body); /*ObjectMapper mapper = new ObjectMapper(); try { MailSender.sendEmail("krisanu.nandi@pkweb.in", "Completed store product details", body + "=============<br/><br/>" + mapper.writeValueAsString(statusBeans), "subhendu.sett@pkweb.in"); } catch (JsonProcessingException ex) { Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, null, ex); }*/ } // return statusBeans; System.out.println("=============================================================end"); }
From source file:com.salsaberries.narchiver.Trawler.java
/** * Extracts links from html, and returns a set of Pages with their parent * page already defined./*from w w w. j a v a2 s .c om*/ * * @param html * @return A list of pages to follow. */ private ArrayList<Page> extractPages(Page extractPage) { String html = extractPage.getHtml(); ArrayList<Page> pages = new ArrayList<>(); // Are we at a stop at page? for (String e : stopAt) { if (extractPage.getTagURL().contains(e)) { return pages; } } // Parse the html Document doc = Jsoup.parse(html); Elements links = doc.getElementsByTag("a"); for (Element link : links) { String tagURL = ""; String linkText = ""; boolean alreadyFollowed; boolean validURL = false; // First format the link if (link.attr("href").startsWith(baseURL)) { tagURL = link.attr("href").replace(baseURL, ""); linkText = link.html(); validURL = true; } else if (link.attr("href").startsWith("/")) { tagURL = link.attr("href"); linkText = link.html(); validURL = true; } else if (link.attr("href").startsWith("./")) { tagURL = link.attr("href").substring(1); linkText = link.html(); validURL = true; } //else if (!link.attr("href").startsWith("/") && !link.attr("href").startsWith("http")) { // tagURL = "/" + link.attr("href"); // linkText = link.html(); // validURL = true; //} // Has it already been followed? alreadyFollowed = trawledPages.contains(tagURL); // Does it violate the exclusion rules? boolean excluded = false; for (String e : exclude) { if (tagURL.contains(e)) { excluded = true; } } // Does it violate the exclusion equal rule? for (String e : excludeIfEqual) { if (tagURL.equals(e)) { excluded = true; } } if (!alreadyFollowed && validURL && !excluded) { logger.debug("Creating new page at URL " + tagURL); Page page = new Page(tagURL, extractPage, linkText); trawledPages.add(tagURL); pages.add(page); } if (alreadyFollowed) { logger.debug("Skipping duplicate at URL " + tagURL); } if (!validURL) { logger.debug("Invalid URL at " + link.attr("href")); } if (excluded) { logger.debug("Exclusion at " + link.attr("href")); } } return pages; }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
@Override public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection) throws IOException { String reservation_info = item.getReservation_info(); Document doc = null;//from w ww . ja v a 2 s. co m if (useraction == MultiStepResult.ACTION_CONFIRMATION) { List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("make_allvl", "Bestaetigung")); nameValuePairs.add(new BasicNameValuePair("target", "makevorbest")); httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); return new ReservationResult(MultiStepResult.Status.OK); } else if (selection == null || useraction == 0) { String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding()); doc = Jsoup.parse(html); if (doc.select("input[name=AUSWEIS]").size() > 0) { // Needs login List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("AUSWEIS", acc.getName())); nameValuePairs.add(new BasicNameValuePair("PWD", acc.getPassword())); if (data.has("db")) { try { nameValuePairs.add(new BasicNameValuePair("vkontodb", data.getString("db"))); } catch (JSONException e) { // TODO Auto-generated catch block e.printStackTrace(); } } nameValuePairs.add(new BasicNameValuePair("B1", "weiter")); nameValuePairs.add(new BasicNameValuePair("target", doc.select("input[name=target]").val())); nameValuePairs.add(new BasicNameValuePair("type", "VT2")); html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); doc = Jsoup.parse(html); } if (doc.select("select[name=" + branch_inputfield + "]").size() == 0) { if (doc.select("select[name=VZST]").size() > 0) { branch_inputfield = "VZST"; } } if (doc.select("select[name=" + branch_inputfield + "]").size() > 0) { List<Map<String, String>> branches = new ArrayList<>(); for (Element option : doc.select("select[name=" + branch_inputfield + "]").first().children()) { String value = option.text().trim(); String key; if (option.hasAttr("value")) { key = option.attr("value"); } else { key = value; } Map<String, String> selopt = new HashMap<>(); selopt.put("key", key); selopt.put("value", value); branches.add(selopt); } _res_target = doc.select("input[name=target]").attr("value"); ReservationResult result = new ReservationResult(MultiStepResult.Status.SELECTION_NEEDED); result.setActionIdentifier(ReservationResult.ACTION_BRANCH); result.setSelection(branches); return result; } } else if (useraction == ReservationResult.ACTION_BRANCH) { List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair(branch_inputfield, selection)); nameValuePairs.add(new BasicNameValuePair("button2", "weiter")); nameValuePairs.add(new BasicNameValuePair("target", _res_target)); String html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); doc = Jsoup.parse(html); } if (doc == null) { return new ReservationResult(MultiStepResult.Status.ERROR); } if (doc.select("input[name=target]").size() > 0) { if (doc.select("input[name=target]").attr("value").equals("makevorbest")) { List<String[]> details = new ArrayList<>(); if (doc.getElementsByClass("kontomeldung").size() == 1) { details.add(new String[] { doc.getElementsByClass("kontomeldung").get(0).text().trim() }); } Pattern p = Pattern.compile("geb.hr", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); for (Element div : doc.select(".kontozeile_center")) { for (String text : Jsoup.parse(div.html().replaceAll("(?i)<br[^>]*>", "br2n")).text() .split("br2n")) { if (p.matcher(text).find() && !text.contains("usstehend") && text.contains("orbestellung")) { details.add(new String[] { text.trim() }); } } } if (doc.select("#vorbest").size() > 0 && doc.select("#vorbest").val().contains("(")) { // Erlangen uses "Kostenpflichtige Vorbestellung (1 Euro)" // as the label of its reservation button details.add(new String[] { doc.select("#vorbest").val().trim() }); } for (Element row : doc.select(".kontozeile_center table tr")) { if (row.select(".konto_feld").size() == 1 && row.select(".konto_feldinhalt").size() == 1) { details.add(new String[] { row.select(".konto_feld").text().trim(), row.select(".konto_feldinhalt").text().trim() }); } } ReservationResult result = new ReservationResult(MultiStepResult.Status.CONFIRMATION_NEEDED); result.setDetails(details); return result; } } if (doc.getElementsByClass("kontomeldung").size() == 1) { return new ReservationResult(MultiStepResult.Status.ERROR, doc.getElementsByClass("kontomeldung").get(0).text()); } return new ReservationResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.UNKNOWN_ERROR)); }
From source file:me.vertretungsplan.parser.IndiwareParser.java
void parseIndiwarePage(SubstitutionSchedule v, String response) throws JSONException, IOException { boolean html; Element doc; if (response.contains("<html") || response.contains("<table")) { html = true;/*from www . j a v a 2 s . c o m*/ doc = Jsoup.parse(response); } else { html = false; doc = Jsoup.parse(response, "", Parser.xmlParser()); } if (html && data.has(PARAM_EMBEDDED_CONTENT_SELECTOR)) { String selector = data.getString(PARAM_EMBEDDED_CONTENT_SELECTOR); Elements elems = doc.select(selector); if (elems.size() == 0) throw new IOException("No elements found using " + selector); for (Element elem : elems) { v.addDay(parseIndiwareDay(elem, true)); } } else if (html && doc.select(".vpfuer").size() > 1) { // multiple schedules after each other on one page String[] htmls = doc.html().split("<span class=\"vpfuer\">"); for (int i = 1; i < htmls.length; i++) { Document splitDoc = Jsoup.parse(htmls[i]); v.addDay(parseIndiwareDay(splitDoc, true)); } } else { v.addDay(parseIndiwareDay(doc, html)); } }
From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule7.java
@Override public ArrayList<Person> run(String url, Document doc) { ArrayList<Person> editorList = new ArrayList<>(); Elements editors = doc.select("dt:contains(Authors/Editors) ~ dd, dt:contains(Author/Editor) ~ dd"); if (editors.size() == 0) return null; boolean skip = false; for (Element editor : editors) { Element prev = editor.previousElementSibling(); if (prev.tagName().equals("dt")) { if (!prev.text().trim().toLowerCase().startsWith("authors/editors") && !prev.text().trim().toLowerCase().startsWith("author/editor")) { skip = true;//from ww w . jav a2 s .co m } } if (skip) { Element next = editor.nextElementSibling(); if (next != null) { if (next.text().trim().toLowerCase().startsWith("authors/editors") || next.text().trim().toLowerCase().startsWith("author/editor")) { skip = false; continue; } } continue; } if (StringUtils.countMatches(editor.text(), " - ") > 2) { Log.log("warning", url + ": This editor may be a list of editors separated by - "); EditorsRule5 ed5 = new EditorsRule5(); return ed5.run(url, doc); } String[] splitted = editor.html().split("<br />|<br clear=\"none\" />"); if (splitted.length < 2) { if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:")) continue; Person result = NameParser.parse(editor.text()); if (result == null) continue; for (int i = 0; i < editor.select("a").size(); i++) { if (!editor.select("a").get(i).attr("href").isEmpty()) { if (editor.select("a").get(i).attr("href").contains("@")) { result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", "")); } else { result.addWebsite(editor.select("a").get(i).attr("href")); } } } editorList.add(result); } else { for (String split : splitted) { if (!split.isEmpty()) { if (split.equals("WHATWG:") || split.equals("W3C:")) continue; Document newdoc = Jsoup.parse(split.replaceAll("\n", "")); Person result = NameParser.parse(newdoc.text()); if (result == null) continue; for (int i = 0; i < newdoc.select("a").size(); i++) { if (!newdoc.select("a").get(i).attr("href").isEmpty()) { if (newdoc.select("a").get(i).attr("href").contains("@")) { result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", "")); } else { result.addWebsite(newdoc.select("a").get(i).attr("href")); } } } editorList.add(result); } } } Element next = editor.nextElementSibling(); if (next != null) if (next.tag().getName().equals("dt")) break; } if (editorList.size() == 0) return null; return editorList; }