List of usage examples for org.jsoup.select Elements text
public String text()
From source file:org.brunocvcunha.taskerbox.impl.jobs.MonsterJobSeeker.java
@Override protected void execute() throws Exception { try {//from w w w .j a v a 2 s . co m for (int x = 1; x < this.maxPages; x++) { int uniqueCount = 0; // DefaultHttpClient client = // TaskerboxHttpBox.getInstance().buildNewHttpClient(); String seekUrl = "http://jobsearch.monster." + this.site + "/search/?q=" + URLEncoder.encode(this.search) + "&sort=dt.rv.di&pg=" + x; logInfo(log, "... Seeking " + seekUrl); HttpEntity entity = TaskerboxHttpBox.getInstance().getEntityForURL(seekUrl); String result = TaskerboxHttpBox.getInstance().readResponseFromEntity(entity); if (result.contains("Sorry, no jobs were found that match your criteria")) { System.err.println("Busca encerrada."); this.bootstrapHttpClient(true); break; // return; } try { Document doc = Jsoup.parse(result); Elements el = doc.select("table.listingsTable").select("tr"); for (val item : el) { Elements jobTitleEl = item.select("div.jobTitleContainer"); Elements companyEl = item.select("div.companyContainer"); Elements locationEl = item.select("div.jobLocationSingleLine"); // aaa String url = jobTitleEl.select("a").attr("href"); if (url.equals("")) { continue; } if (url.contains("?mescoid")) { url = url.substring(0, url.indexOf("?mescoid")); } if (url.contains("?jobPosition")) { url = url.substring(0, url.indexOf("?jobPosition")); } if (url.contains("&jobPosition")) { url = url.substring(0, url.indexOf("&jobPosition")); } String company = ""; if (!companyEl.select("a").isEmpty()) { company = companyEl.select("a").get(0).attr("title"); } handleJob(jobTitleEl.text(), company, locationEl.select("a").text(), url); uniqueCount++; } if (uniqueCount == 0) { logInfo(log, "MONSTER BREAK -- NO UNIQUE COUNT"); break; } try { Thread.sleep(10000L); } catch (InterruptedException e) { e.printStackTrace(); } } catch (Exception e) { e.printStackTrace(); } } } catch (Exception e) { e.printStackTrace(); } }
From source file:cn.wanghaomiao.xpath.core.XpathEvaluator.java
/** * ?xpath/*ww w. j ava 2 s .c o m*/ * * @param xpath * @param root * @return */ public List<JXNode> evaluate(String xpath, Elements root) throws NoSuchAxisException, NoSuchFunctionException { List<JXNode> res = new LinkedList<JXNode>(); Elements context = root; List<Node> xpathNodes = getXpathNodeTree(xpath); for (int i = 0; i < xpathNodes.size(); i++) { Node n = xpathNodes.get(i); LinkedList<Element> contextTmp = new LinkedList<Element>(); if (n.getScopeEm() == ScopeEm.RECURSIVE || n.getScopeEm() == ScopeEm.CURREC) { if (n.getTagName().startsWith("@")) { for (Element e : context) { //? String key = n.getTagName().substring(1); if (key.equals("*")) { res.add(JXNode.t(e.attributes().toString())); } else { String value = e.attr(key); if (StringUtils.isNotBlank(value)) { res.add(JXNode.t(value)); } } //?? for (Element dep : e.getAllElements()) { if (key.equals("*")) { res.add(JXNode.t(dep.attributes().toString())); } else { String value = dep.attr(key); if (StringUtils.isNotBlank(value)) { res.add(JXNode.t(value)); } } } } } else if (n.getTagName().endsWith("()")) { //??text() res.add(JXNode.t(context.text())); } else { Elements searchRes = context.select(n.getTagName()); for (Element e : searchRes) { Element filterR = filter(e, n); if (filterR != null) { contextTmp.add(filterR); } } context = new Elements(contextTmp); if (i == xpathNodes.size() - 1) { for (Element e : contextTmp) { res.add(JXNode.e(e)); } } } } else { if (n.getTagName().startsWith("@")) { for (Element e : context) { String key = n.getTagName().substring(1); if (key.equals("*")) { res.add(JXNode.t(e.attributes().toString())); } else { String value = e.attr(key); if (StringUtils.isNotBlank(value)) { res.add(JXNode.t(value)); } } } } else if (n.getTagName().endsWith("()")) { res = (List<JXNode>) callFunc(n.getTagName().substring(0, n.getTagName().length() - 2), context); } else { for (Element e : context) { Elements filterScope = e.children(); if (StringUtils.isNotBlank(n.getAxis())) { filterScope = getAxisScopeEls(n.getAxis(), e); } for (Element chi : filterScope) { Element fchi = filter(chi, n); if (fchi != null) { contextTmp.add(fchi); } } } context = new Elements(contextTmp); if (i == xpathNodes.size() - 1) { for (Element e : contextTmp) { res.add(JXNode.e(e)); } } } } } return res; }
From source file:com.weavers.duqhan.business.impl.ProductServiceImpl.java
@Override public void loadTempProducts(List<StatusBean> statusBeans) { boolean isSuccess = true; String startDate = new Date().toString(); Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, "(==I==)DATE: " + startDate + "Store product details in temp product table start....."); try {/*from w ww . j av a 2s . c o m*/ String status = ""; for (StatusBean statusBean : statusBeans) { status = "Link duplicate"; Temtproductlinklist temtproductlinklist = temtproductlinklistDao.loadById(statusBean.getId()); if (temtproductlinklist != null && temtproductlinklist.getStatus() == 0) { Product testProduct = productDao.getProductByExternelLink(temtproductlinklist.getLink()); if (testProduct == null) { String value = ""; Elements detailMain; Elements detailSub; Elements specifics; double votes = 0.0; double stars = 0.0; double feedback = 0.0; String url = temtproductlinklist.getLink(); try { testProduct = new Product(); Product savedTestProduct; //=================== Random sleep START ===================// // TimeUnit.SECONDS.sleep(30 + (int) (Math.random() * 100)); Random randomObj = new Random(); TimeUnit.SECONDS.sleep(randomObj.ints(30, 60).findFirst().getAsInt()); //=================== Random sleep END =====================// Document doc = Jsoup.connect(url).get(); detailMain = doc.select("#j-detail-page"); if (!detailMain.isEmpty()) { //=================== Criteria Block START==================// detailMain = doc.select(".rantings-num"); if (!detailMain.isEmpty()) { votes = Double.valueOf(detailMain.text().split(" votes")[0].split("\\(")[1]); } detailMain = doc.select(".percent-num"); if (!detailMain.isEmpty()) { stars = Double.valueOf(detailMain.text()); } detailMain = doc.select("ul.ui-tab-nav li[data-trigger='feedback'] a"); if (!detailMain.isEmpty()) { feedback = Double.valueOf(detailMain.text().split("\\(")[1].split("\\)")[0]); } //=================== Criteria Block END==================// if (votes > 10.0 && stars > 4.0 && feedback > 4.0) { detailMain = doc.select(".detail-wrap .product-name"); testProduct.setName(detailMain .text());/*.substring(0, Math.min(detailMain.text().length(), 50))*/ detailMain = doc.select(".detail-wrap .product-name"); testProduct.setDescription(detailMain.text()); testProduct.setExternalLink(url); testProduct.setVendorId(1l);//?????????????????????? //=================== Packaging block START==================// Double weight = 1.0; Double width = 1.0; Double height = 1.0; Double length = 1.0; detailMain = doc.select( "div#j-product-desc div.pnl-packaging-main ul li.packaging-item"); for (Element element : detailMain) { String packagingTitle = element.select("span.packaging-title").text(); String packagingDesc = element.select("span.packaging-des").text(); if (packagingTitle.trim().equals("Package Weight:")) { String str = packagingDesc; str = str.replaceAll("[^.?0-9]+", " "); if (Arrays.asList(str.trim().split(" ")) != null) { if (!Arrays.asList(str.trim().split(" ")).isEmpty()) { try { weight = Double.parseDouble( Arrays.asList(str.trim().split(" ")).get(0)); } catch (Exception e) { weight = 1.0; } } } System.out.println("weight == " + weight); } else if (packagingTitle.trim().equals("Package Size:")) { String str = packagingDesc; str = str.replaceAll("[^.?0-9]+", " "); if (Arrays.asList(str.trim().split(" ")) != null) { if (!Arrays.asList(str.trim().split(" ")).isEmpty()) { try { width = Double.parseDouble( Arrays.asList(str.trim().split(" ")).get(0)); height = Double.parseDouble( Arrays.asList(str.trim().split(" ")).get(1)); length = Double.parseDouble( Arrays.asList(str.trim().split(" ")).get(2)); } catch (Exception e) { width = 1.0; height = 1.0; length = 1.0; } } } System.out.println("width == " + width); System.out.println("height == " + height); System.out.println("length == " + length); } } //=================== Packaging block END==================// //=================== Category block START==================// detailMain = doc.select("div.ui-breadcrumb div.container a"); Long productCategoryId = 0L; String parentPath = ""; String thisCategory = detailMain.last().text().trim(); System.out.println("thisCategory == " + thisCategory); Category parentCategory = new Category(); parentCategory.setId(0L); parentCategory.setParentPath(""); for (Element element : detailMain) { String newCategory; newCategory = element.text().trim(); System.out.println("newCategory======" + newCategory); if (newCategory.equals("Home") || newCategory.equals("All Categories")) { } else { Category category = categoryDao.getCategoryByName(newCategory); if (category != null) { if (category.getName().equals(thisCategory)) { productCategoryId = category.getId(); parentPath = category.getParentPath(); } parentCategory = category; } else { category = new Category(); category.setId(null); category.setName(newCategory); category.setParentId(parentCategory.getId()); category.setParentPath(parentCategory.getParentPath() + parentCategory.getId() + "="); category.setQuantity(0); category.setImgUrl("-"); category.setDisplayText(newCategory); Category category2 = categoryDao.save(category); if (category.getName().equals(thisCategory)) { productCategoryId = category2.getId(); parentPath = category2.getParentPath(); } parentCategory = category2; } } } //=================== Category block END==================// //=============== Specifications block START==============// detailMain = doc.select(".product-property-list .property-item"); String specifications = ""; for (Element element : detailMain) { specifications = specifications + element.select(".propery-title").get(0).text().replace(",", "/") .replace(":", "-") + ":" + element.select(".propery-des").get(0).text() .replace(",", "/").replace(":", "-") + ",";//TODO:, check } //=============== Specifications Block END==============// //=============== Shipping Time Block START==============// String shippingTime = ""; detailMain = doc.select(".shipping-days[data-role='delivery-days']"); System.out.println("value detailMain" + detailMain.toString()); shippingTime = detailMain.text(); //=============== Shipping Time Block END==============// //=============== Shipping Cost Block START==============// detailMain = doc.select(".logistics-cost"); value = detailMain.text(); if (!value.equalsIgnoreCase("Free Shipping")) { // f = 0.00; } else { // f = Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1")); } //=============== Shipping Cost Block END==============// //=================Product save 1st START==============// testProduct.setCategoryId(productCategoryId); testProduct.setLastUpdate(new Date()); testProduct.setParentPath(parentPath); testProduct.setImgurl("-"); testProduct.setProperties("-"); testProduct.setProductWidth(width); testProduct.setProductLength(length); testProduct.setProductWeight(weight); testProduct.setProductHeight(height); testProduct.setShippingRate(0.0); testProduct.setShippingTime("45"); testProduct.setSpecifications(specifications); savedTestProduct = productDao.save(testProduct); //====================Product save 1st END==============// //========= Property, Property Value, Property Product Map Block START ========// double discountPrice = 0.0; double actualPrice = 0.0; double markupPrice = 0.0; String id = ""; String allProperties = ""; //------------------------Read Color css START---------------------// specifics = doc.select("#j-product-info-sku dl.p-property-item"); Elements cssdetailMain = doc.select("link[href]"); Document cssdoc = new Document(""); System.out.println( "====================================================cssdetailMain" + cssdetailMain.size()); for (Element element : cssdetailMain) { String cssurl = element.attr("abs:href"); if (cssurl.contains("??main-detail")) { try { cssdoc = Jsoup.connect(cssurl).get(); } catch (IOException ex) { } break; } } //-----------------------Read Color css END--------------------------// //-----------Product Property, Property Value START--------// Map<String, ProductPropertyvalues> propertyValuesMap = new HashMap<>(); if (!specifics.isEmpty()) { ProductProperties testPorperties; ProductProperties saveTestPorperties; ProductPropertyvalues testPropertyValues; for (Element specific : specifics) { System.out.println("head ==== " + specific.select("dt").text()); testPorperties = productPropertiesDao .loadByName(specific.select("dt").text()); if (testPorperties == null) { testPorperties = new ProductProperties(); testPorperties.setPropertyName(specific.select("dt").text()); saveTestPorperties = productPropertiesDao.save(testPorperties); } else { saveTestPorperties = testPorperties; } allProperties = allProperties + saveTestPorperties.getId().toString() + "-"; detailSub = specific.select("dd ul li"); String valu = "-"; for (Element element : detailSub) { testPropertyValues = new ProductPropertyvalues(); id = element.select("a[data-sku-id]").attr("data-sku-id").trim(); testPropertyValues.setRefId(id); if (element.hasClass("item-sku-image")) { valu = element.select("a img[src]").get(0).absUrl("src") .split(".jpg")[0] + ".jpg"; String title = element.select("a img").get(0).attr("title"); String imgUrl = GoogleBucketFileUploader .uploadProductImage(valu, savedTestProduct.getId()); valu = "<img src='" + imgUrl + "' title='" + title + "' style='height:40px; width:40px;'/>"; } else if (element.hasClass("item-sku-color")) { String style = cssdoc.html().split("sku-color-" + id)[1] .split("}")[0].substring(1); valu = "<span style='" + style + "' ; height:40px; width:40px; display:block;'></span>"; } else { valu = element.select("a span").toString(); } System.out.println("valu === " + valu); testPropertyValues.setProductId(savedTestProduct.getId()); testPropertyValues.setPropertyId(saveTestPorperties.getId()); testPropertyValues.setValueName(valu); propertyValuesMap.put(id, productPropertyvaluesDao.save(testPropertyValues)); } } savedTestProduct.setProperties(allProperties); } //-----------Product Property, Property Value END--------// //----------------------Read json START------------------// List<AxpProductDto> axpProductDtos = new ArrayList<>(); Elements scripts = doc.select("script"); // Get the script part for (Element script : scripts) { if (script.html().contains("var skuProducts=")) { String jsonData = ""; jsonData = script.html().split("var skuProducts=")[1] .split("var GaData")[0].trim(); jsonData = jsonData.substring(0, jsonData.length() - 1); Gson gsonObj = new Gson(); axpProductDtos = Arrays .asList(gsonObj.fromJson(jsonData, AxpProductDto[].class)); break; } } //----------------------Read json END------------------// //-------------Product Properties Map START------------// for (AxpProductDto thisAxpProductDto : axpProductDtos) { SkuVal skuVal = thisAxpProductDto.getSkuVal(); if (skuVal.getActSkuCalPrice() != null) { value = skuVal.getActSkuCalPrice().trim(); discountPrice = CurrencyConverter.usdTOinr( Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1"))); value = skuVal.getSkuCalPrice().trim(); actualPrice = CurrencyConverter.usdTOinr( Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1"))); markupPrice = discountPrice * 0.15 + 100; discountPrice = Math.ceil((discountPrice + markupPrice) / 10) * 10; actualPrice = Math.round(actualPrice + markupPrice); } else { discountPrice = 0.0; value = skuVal.getSkuCalPrice().trim(); actualPrice = CurrencyConverter.usdTOinr( Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1"))); markupPrice = actualPrice * 0.15 + 100; discountPrice = Math.round(actualPrice + markupPrice); actualPrice = Math.round(actualPrice + markupPrice); } ProductPropertiesMap productPropertyMap = new ProductPropertiesMap(); String myPropValueIds = ""; if (thisAxpProductDto.getSkuAttr() != null) { String[] skuPropIds = thisAxpProductDto.getSkuPropIds().split(","); for (String skuPropId : skuPropIds) { myPropValueIds = myPropValueIds + propertyValuesMap.get(skuPropId).getId().toString() + "_"; } productPropertyMap.setPropertyvalueComposition(myPropValueIds); } else { productPropertyMap.setPropertyvalueComposition("_"); } productPropertyMap.setDiscount(discountPrice); productPropertyMap.setPrice(actualPrice); productPropertyMap.setProductId(savedTestProduct); productPropertyMap.setQuantity(5l); productPropertiesMapDao.save(productPropertyMap); } //-------------Product Properties Map START------------// //========= Property, Property Value, Property Product Map Block END ========// //============= Multiple Image Block START =============// detailMain = doc.select("ul.image-thumb-list span.img-thumb-item img[src]"); int flg = 0; String imgUrl = ""; for (Element element : detailMain) { imgUrl = GoogleBucketFileUploader.uploadProductImage( element.absUrl("src").split(".jpg")[0] + ".jpg", savedTestProduct.getId()); if (flg == 0) { flg++; savedTestProduct.setImgurl(imgUrl); } else { ProductImg productImg = new ProductImg(); productImg.setId(null); productImg.setImgUrl(imgUrl); productImg.setProductId(savedTestProduct.getId()); productImgDao.save(productImg); } } //============= Multiple Image Block END =============// //=================Product save final START==============// if (productDao.save(savedTestProduct) != null) { temtproductlinklist.setStatus(1);// temtproductlinklistDao.save(temtproductlinklist); status = "Success"; } //=================Product save final START==============// } else { temtproductlinklist.setStatus(2);// temtproductlinklistDao.save(temtproductlinklist); status = "criteria mismatch"; } } else { status = "Page not found"; } } catch (Exception ex) { System.out.println( "=============================================================Exception1" + ex); temtproductlinklist.setStatus(4);// temtproductlinklistDao.save(temtproductlinklist); System.out.println("Exception === " + ex); status = "Failure"; Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, "(==E==)DATE: " + new Date().toString() + "Store product details in temp product table get error in sub process.....\n Link Id: " + statusBean.getId() + "\n Started on" + startDate, ex); } } else { temtproductlinklist.setStatus(3);// temtproductlinklistDao.save(temtproductlinklist); status = "Product exsist"; } } // String body = "Id: " + temtproductlinklist.getId() + "<br/> Status: " + status; // MailSender.sendEmail("krisanu.nandi@pkweb.in", "Product captured", body, "subhendu.sett@pkweb.in"); statusBean.setStatus(status); } System.out.println("=============================================================status" + status); } catch (Exception e) { System.out.println("=============================================================Exception2" + e); isSuccess = false; String body = "(==E==)DATE: " + new Date().toString() + "Store product details in temp product table get error.....<br/> Started on" + startDate + "<br/>"; Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, body, e); // MailSender.sendEmail("krisanu.nandi@pkweb.in", "Stopped store product details", body + e.getLocalizedMessage(), "subhendu.sett@pkweb.in"); } if (isSuccess) { String body = "(==I==)DATE: " + new Date().toString() + "Store product details in temp product table end.....<br/> Started on" + startDate; Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, body); /*ObjectMapper mapper = new ObjectMapper(); try { MailSender.sendEmail("krisanu.nandi@pkweb.in", "Completed store product details", body + "=============<br/><br/>" + mapper.writeValueAsString(statusBeans), "subhendu.sett@pkweb.in"); } catch (JsonProcessingException ex) { Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, null, ex); }*/ } // return statusBeans; System.out.println("=============================================================end"); }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
@Override public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException { start(); // TODO: Is this necessary? int resultNum; if (!login(acc)) { return null; }//from w w w .ja v a 2s . c o m // Geliehene Medien String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=1", ENCODING); List<LentItem> medien = new ArrayList<>(); Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url); parse_medialist(medien, doc, 1); if (doc.select(".box-right").size() > 0) { for (Element link : doc.select(".box-right").first().select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq == null || hrefq.get("methodToCall") == null) { continue; } if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) { html = httpGet(href, ENCODING); parse_medialist(medien, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos"))); } } } if (doc.select("#label1").size() > 0) { resultNum = 0; String rNum = doc.select("#label1").first().text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1"); if (rNum.length() > 0) { resultNum = Integer.parseInt(rNum); } assert (resultNum == medien.size()); } // Ordered media ("Bestellungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=6", ENCODING); List<ReservedItem> reserved = new ArrayList<>(); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); parse_reslist("6", reserved, doc, 1); Elements label6 = doc.select("#label6"); if (doc.select(".box-right").size() > 0) { for (Element link : doc.select(".box-right").first().select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq == null || hrefq.get("methodToCall") == null) { break; } if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) { html = httpGet(href, ENCODING); parse_reslist("6", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos"))); } } } // Prebooked media ("Vormerkungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=7", ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); parse_reslist("7", reserved, doc, 1); if (doc.select(".box-right").size() > 0) { for (Element link : doc.select(".box-right").first().select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq == null || hrefq.get("methodToCall") == null) { break; } if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) { html = httpGet(href, ENCODING); parse_reslist("7", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos"))); } } } if (label6.size() > 0 && doc.select("#label7").size() > 0) { resultNum = 0; String rNum = label6.text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1"); if (rNum.length() > 0) { resultNum = Integer.parseInt(rNum); } rNum = doc.select("#label7").text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1"); if (rNum.length() > 0) { resultNum += Integer.parseInt(rNum); } assert (resultNum == reserved.size()); } AccountData res = new AccountData(acc.getId()); if (doc.select("#label8").size() > 0) { String text = doc.select("#label8").first().text().trim(); if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) { text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2"); res.setPendingFees(text); } } Pattern p = Pattern.compile("[^0-9.]*", Pattern.MULTILINE); if (doc.select(".box3").size() > 0) { for (Element box : doc.select(".box3")) { if (box.select("strong").size() == 1) { String text = box.select("strong").text(); if (text.equals("Jahresgebhren")) { text = box.text(); text = p.matcher(text).replaceAll(""); res.setValidUntil(text); } } } } res.setLent(medien); res.setReservations(reserved); return res; }
From source file:com.normalexception.app.rx8club.fragment.category.CategoryFragment.java
/** * Grab contents from the forum that the user clicked on * @param doc The document parsed from the link * @param id The id number of the link * @param isMarket True if the link is from a marketplace category *//* w w w . j a v a 2s . c o m*/ public void getCategoryContents(Document doc, String id, boolean isMarket) { // Update pagination try { Elements pageNumbers = doc.select("div[class=pagenav]"); Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]"); thisPage = pageLinks.text().split(" ")[1]; finalPage = pageLinks.text().split(" ")[3]; } catch (Exception e) { } // Make sure id contains only numbers if (!isNewTopicActivity) id = Utils.parseInts(id); // Grab each thread Elements threadListing = doc.select("table[id=threadslist] > tbody > tr"); for (Element thread : threadListing) { try { boolean isSticky = false, isLocked = false, hasAttachment = false, isAnnounce = false, isPoll = false; String formattedTitle = "", postCount = "0", views = "0", forum = "", threadUser = "", lastUser = "", threadLink = "", lastPage = "", totalPosts = "0", threadDate = ""; Elements announcementContainer = thread.select("td[colspan=5]"); Elements threadTitleContainer = thread.select("a[id^=thread_title]"); // We could have two different types of threads. Announcement threads are // completely different than the other types of threads (sticky, locked, etc) // so we need to play some games here if (announcementContainer != null && !announcementContainer.isEmpty()) { Log.d(TAG, "Announcement Thread Found"); Elements annThread = announcementContainer.select("div > a"); Elements annUser = announcementContainer.select("div > span[class=smallfont]"); formattedTitle = "Announcement: " + annThread.first().text(); threadUser = annUser.last().text(); threadLink = annThread.attr("href"); isAnnounce = true; } else if (threadTitleContainer != null && !threadTitleContainer.isEmpty()) { Element threadLinkEl = thread.select("a[id^=thread_title]").first(); Element repliesText = thread.select("td[title^=Replies]").first(); Element threaduser = thread.select("td[id^=td_threadtitle_] div.smallfont").first(); Element threadicon = thread.select("img[id^=thread_statusicon_]").first(); Element threadDiv = thread.select("td[id^=td_threadtitle_] > div").first(); Element threadDateFull = thread.select("td[title^=Replies:] > div").first(); try { isSticky = threadDiv.text().contains("Sticky:"); } catch (Exception e) { } try { isPoll = threadDiv.text().contains("Poll:"); } catch (Exception e) { } try { String icSt = threadicon.attr("src"); isLocked = (icSt.contains("lock") && icSt.endsWith(".gif")); } catch (Exception e) { } String preString = ""; try { preString = threadDiv.select("span > b").text(); } catch (Exception e) { } try { hasAttachment = !threadDiv.select("a[onclick^=attachments]").isEmpty(); } catch (Exception e) { } // Find the last page if it exists try { lastPage = threadDiv.select("span").last().select("a").last().attr("href"); } catch (Exception e) { } threadDate = threadDateFull.text(); int findAMPM = threadDate.indexOf("M") + 1; threadDate = threadDate.substring(0, findAMPM); String totalPostsInThreadTitle = threadicon.attr("alt"); if (totalPostsInThreadTitle != null && totalPostsInThreadTitle.length() > 0) totalPosts = totalPostsInThreadTitle.split(" ")[2]; // Remove page from the link String realLink = Utils.removePageFromLink(link); if (threadLinkEl.attr("href").contains(realLink) || (isNewTopicActivity || isMarket)) { String txt = repliesText.getElementsByClass("alt2").attr("title"); String splitter[] = txt.split(" ", 4); postCount = splitter[1].substring(0, splitter[1].length() - 1); views = splitter[3]; try { if (this.isNewTopicActivity) forum = thread.select("td[class=alt1]").last().text(); } catch (Exception e) { } formattedTitle = String.format("%s%s%s", isSticky ? "Sticky: " : isPoll ? "Poll: " : "", preString.length() == 0 ? "" : preString + " ", threadLinkEl.text()); } threadUser = threaduser.text(); lastUser = repliesText.select("a[href*=members]").text(); threadLink = threadLinkEl.attr("href"); } // Add our thread to our list as long as the thread // contains a title if (!formattedTitle.equals("")) { ThreadModel tv = new ThreadModel(); tv.setTitle(formattedTitle); tv.setStartUser(threadUser); tv.setLastUser(lastUser); tv.setLink(threadLink); tv.setLastLink(lastPage); tv.setPostCount(postCount); tv.setMyPosts(totalPosts); tv.setViewCount(views); tv.setLocked(isLocked); tv.setSticky(isSticky); tv.setAnnouncement(isAnnounce); tv.setPoll(isPoll); tv.setHasAttachment(hasAttachment); tv.setForum(forum); tv.setLastPostTime(threadDate); threadlist.add(tv); } else if (thread.text() .contains(MainApplication.getAppContext().getString(R.string.constantNoUpdate))) { Log.d(TAG, String.format("Found End of New Threads after %d threads...", threadlist.size())); if (threadlist.size() > 0) { ThreadModel ltv = threadlist.get(threadlist.size() - 1); Log.d(TAG, String.format("Last New Thread '%s'", ltv.getTitle())); } if (!PreferenceHelper.hideOldPosts(MainApplication.getAppContext())) threadlist.add(new ThreadModel(true)); else { Log.d(TAG, "User Chose To Hide Old Threads"); break; } } } catch (Exception e) { Log.e(TAG, "Error Parsing That Thread...", e); Log.d(TAG, "Thread may have moved"); } } }
From source file:com.normalexception.app.rx8club.fragment.FragmentUtils.java
/** * Check if the user can create a new thread. If not, report back a * false boolean value// w ww .j ava 2 s . co m * @param address The page to check permission to * @param params Parameters to the url * @return True if user has permission */ public static boolean doesUserHavePermissionToPage(Activity src, String address, String... params) { boolean result = false; for (String param : params) address += param; Document output = VBForumFactory.getInstance().get(src, address); Elements eles = null; eles = output.select("div[class=ib-padding]"); Log.v(TAG, "doesUserHavePermissionToPage:Mobile Check = " + eles.size()); if (eles.isEmpty()) { eles = output.select("td[class=panelsurround]"); Log.v(TAG, "doesUserHavePermissionToPage:Standard Check = " + eles.size()); } if (eles != null) result = !eles.text().contains("do not have permission to access this page"); return result; }
From source file:com.normalexception.app.rx8club.fragment.ProfileFragment.java
/** * Get the user information from the users profile * @param doc The page document/*from w w w . j a va 2s . c o m*/ */ private void getUserInformation(Document doc) { final UserProfile upInstance = UserProfile.getInstance(); stubs = new ArrayList<ProfileModel>(); // Title Elements userInfo = doc.select("div[id=main_userinfo]"); Elements title = userInfo.select("h2"); upInstance.setUserTitle(title.text()); // Posts Elements statisticInfo = doc.select("fieldset[class=statistics_group]"); Elements post = statisticInfo.select("li"); // Profile Pic Elements profilePicInfo = doc.select("td[id=profilepic_cell] > img"); // Grab image, trap try { upInstance.setUserImageLink(profilePicInfo.attr("src")); } catch (Exception e) { } // Grab Post count, trap exception try { upInstance.setUserPostCount( post.get(0).text() + " / " + post.get(1).text().split(" ", 4)[3] + " per day"); } catch (Exception e) { upInstance.setUserPostCount("Error Getting Post Count"); } // Grab Join Date, trap exception try { upInstance.setUserJoinDate(post.get(13).text()); } catch (Exception e) { upInstance.setUserJoinDate("Error Getting Join Date"); } // Threads String link = WebUrls.userUrl + upInstance.getUserId(); doc = VBForumFactory.getInstance().get(getActivity(), link); if (doc != null) { Elements threadlist = doc.select("table[id^=post]"); for (Element threadl : threadlist) { ProfileModel stub = new ProfileModel(); Elements divs = threadl.getElementsByTag("div"); Elements div = divs.get(1).getElementsByTag("a"); stub.setLink(div.attr("href")); stub.setName(div.text()); div = divs.get(5).getElementsByTag("a"); stub.setText(div.text()); stubs.add(stub); } } }
From source file:com.normalexception.app.rx8club.fragment.thread.ThreadFragment.java
/** * Grab contents from the forum that the user clicked on * @param doc The document parsed from the link * @param id The id number of the link * @return An arraylist of forum contents *//* w w w . j a v a 2 s . c o m*/ public void getThreadContents(Document doc) { // Update pagination try { Elements pageNumbers = doc.select("div[class=pagenav]"); if (pageNumbers.first() != null) { Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]"); thisPage = pageLinks.text().split(" ")[1]; finalPage = pageLinks.text().split(" ")[3]; Log.d(TAG, String.format("This Page: %s, Final Page: %s", thisPage, finalPage)); } else { Log.d(TAG, "Thread only contains one page"); } } catch (Exception e) { Log.e(TAG, "We had an error with pagination", e); } // Is user thread admin?? Elements threadTools = doc.select("div[id=threadtools_menu] > form > table"); if (threadTools.text().contains(MODERATION_TOOLS)) { Log.d(TAG, "<><> User has administrative rights here! <><>"); } else { //adminContent.setVisibility(View.GONE); lv.removeHeaderView(adminContent); } // Get the user's actual ID, there is a chance they never got it // before UserProfile.getInstance().setUserId(HtmlFormUtils.getInputElementValueByName(doc, "loggedinuser")); // Get Post Number and security token securityToken = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken"); Elements pNumber = doc.select("a[href^=http://www.rx8club.com/newreply.php?do=newreply&noquote=1&p=]"); String pNumberHref = pNumber.attr("href"); postNumber = pNumberHref.substring(pNumberHref.lastIndexOf("=") + 1); threadNumber = doc.select("input[name=searchthreadid]").attr("value"); Elements posts = doc.select("div[id=posts]").select("div[id^=edit]"); Log.v(TAG, String.format("Parsing through %d posts", posts.size())); for (Element post : posts) { try { Elements innerPost = post.select("table[id^=post]"); // User Control Panel Elements userCp = innerPost.select("td[class=alt2]"); Elements userDetail = userCp.select("div[class=smallfont]"); Elements userSubDetail = userDetail.last().select("div"); Elements userAvatar = userDetail.select("img[alt$=Avatar]"); // User Information PostModel pv = new PostModel(); pv.setUserName(userCp.select("div[id^=postmenu]").text()); pv.setIsLoggedInUser(LoginFactory.getInstance().isLoggedIn() ? UserProfile.getInstance().getUsername().equals(pv.getUserName()) : false); pv.setUserTitle(userDetail.first().text()); pv.setUserImageUrl(userAvatar.attr("src")); pv.setPostDate(innerPost.select("td[class=thead]").first().text()); pv.setPostId(Utils.parseInts(post.attr("id"))); pv.setRootThreadUrl(currentPageLink); // get Likes if any exist Elements eLikes = innerPost.select("div[class*=vbseo_liked] > a"); List<String> likes = new ArrayList<String>(); for (Element eLike : eLikes) likes.add(eLike.text()); pv.setLikes(likes); Iterator<Element> itr = userSubDetail.listIterator(); while (itr.hasNext()) { String txt = itr.next().text(); if (txt.contains("Location:")) pv.setUserLocation(txt); else if (txt.contains("Posts:")) pv.setUserPostCount(txt); else if (txt.contains("Join Date:")) pv.setJoinDate(txt); } // User Post Content pv.setUserPost(formatUserPost(innerPost)); // User signature try { Element userSig = innerPost.select("div[class=konafilter]").first(); pv.setUserSignature(userSig.html()); } catch (NullPointerException npe) { } Elements postAttachments = innerPost.select("a[id^=attachment]"); if (postAttachments != null && !postAttachments.isEmpty()) { ArrayList<String> attachments = new ArrayList<String>(); for (Element postAttachment : postAttachments) { attachments.add(postAttachment.attr("href")); } pv.setAttachments(attachments); } pv.setSecurityToken(securityToken); // Make sure we aren't adding a blank user post if (pv.getUserPost() != null) postlist.add(pv); } catch (Exception e) { Log.w(TAG, "Error Parsing Post...Probably Deleted"); } } }
From source file:net.trustie.model.SFProject_Model.java
public void afterProcess(Page page) { // long start = System.currentTimeMillis(); this.url = page.getPageUrl(); // justify it's enterprise or bluesteel user // this.html = page.getHtml().toString(); this.urlMd5 = DigestUtils.md5Hex(page.getPageUrl()); SimpleDateFormat bartDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); this.collectTime = bartDateFormat.format(new Date()); this.pageMd5 = DigestUtils.md5Hex(urlMd5 + lastUpdate + feature + downloadCount + stars); Document doc = page.getHtml().getDocument(); Elements bodyEles = doc.select("body"); if (bodyEles.size() > 0) { Element body = bodyEles.get(0); String bodyType = body.attr("id"); if ("pg_project".equals(bodyType)) { String type = body.attr("class"); if (type.equals("bluesteel user")) { // bluesteel user extractPageBluesteelUser(doc); } else if (type.equals("enterprise user")) { // enterprise user extractPageEnterpriseUser(doc); } else { // others }//w ww .j a v a 2 s . c om if (lastUpdate.contains("ago")) { this.lastUpdate = getTime(lastUpdate); } if (registeredTime.contains("ago")) { this.registeredTime = getTime((registeredTime)); } if (lastUpdate.equals("")) { this.lastUpdate = "0000-00-00 00:00:00"; } if (registeredTime.equals("")) { this.registeredTime = "0000-00-00 00:00:00"; } } else { // name Elements nameEles = body.select("div#proj_header div.proj-title h2"); this.name = nameEles.text(); // desc Elements descEles = body.select("div#top_left div#home_intro div#proj-overview p"); this.desc = descEles.text(); // features Elements featuresEles = body.select("div#top_left div#home_intro div#proj-overview ul"); this.feature = featuresEles.text(); } this.lastUpdate = DateHandler.formatAllTypeDate(lastUpdate, page.getTime()); this.registeredTime = DateHandler.formatAllTypeDate(registeredTime, page.getTime()); } // long end = System.currentTimeMillis(); // System.out.println(end-start); // System.out.println(this.toString()); // System.out.println(types.get(0).attr("class")); // Document doc=page.getHtml().getDocument(); }
From source file:net.trustie.model.SFProject_Model.java
private void extractPageEnterpriseUser(Document doc) { // name//w ww . j a va 2 s . co m Elements nameElements = doc.select("div#project-header div.content-group h1.project-name a"); name = nameElements.text(); // maintainers // stars Elements starsElements = doc .select("div#project-header div.content-group div.project-rating span:not(.rating-count)"); for (int i = 0; i < starsElements.size(); i++) { String attr = starsElements.get(i).attr("class"); if (attr.equals("rating star")) { stars += 1; } else if (attr.equals("rating star_half")) { stars += 0.5; } else { stars += 0; } } // download count Elements downloadElements = doc .select("div#project-header div.content-group div.group a.download-stats span.data"); if (downloadElements.size() > 0) { String strDownloadCount = downloadElements.get(0).text(); strDownloadCount = strDownloadCount.replaceAll("[^\\d]", ""); this.downloadCount = strDownloadCount; } // last update Elements lastUpdateElements = doc .select("div#project-header div.content-group div.project-rating time.dateUpdated"); if (lastUpdateElements.size() > 0) { lastUpdate = lastUpdateElements.get(0).attr("datetime"); } // platform Elements downloadOSElements = doc.select("div#project-header div.content-group div.download-os"); platform = downloadOSElements.text(); // desc Elements descElements = doc .select("section#overview div.content-group section.primary-content p#project-description"); desc = descElements.text(); // categories // license // feature Elements featureElements = doc.select( "section#overview div.content-group section.primary-content section#project-features div[class=content editable]"); feature = featureElements.text(); // language // intended audience // user interface // program language // registered time // additional detail Elements enterpriseAddtionalElements = doc.select( "section#overview div.content-group section.primary-content aside#additional-details section.content section.project-info"); for (int i = 0; i < enterpriseAddtionalElements.size(); i++) { Element element = enterpriseAddtionalElements.get(i); // System.out.println(element.html()); // System.out.println("*************************************"); Elements tags = element.select("header"); if (tags.size() > 0) { String tag = tags.text(); if (tag.equals("Languages")) { language = element.select("section.content").text(); } else if (tag.equals("Intended Audience")) { intendedAudience = element.select("section.content").text(); } else if (tag.equals("User Interface")) { userInterface = element.select("section.content").text(); } else if (tag.equals("Programming Language")) { programmingLanguage = element.select("section.content").text(); } else if (tag.equals("Registered")) { registeredTime = element.select("section.content").text(); } else if (tag.equals("Last Updated")) { lastUpdate = element.select("section.content").text(); } else if (tag.equals("Maintainers")) { maintainers = element.select("a").text(); } else if (tag.equals("License")) { license = element.select("section.content").text(); } else if (tag.equals("Categories")) { categories = element.select("a").text(); } else { } } } }