List of usage examples for org.jsoup.nodes Element absUrl
public String absUrl(String attributeKey)
From source file:cc.metapro.openct.borrow.BorrowPresenter.java
@Override public Disposable loadUserCenter(final FragmentManager manager, final String code) { ActivityUtils.showProgressDialog(mContext, R.string.loading_borrows); Observable<Document> observable = LocalHelper.login(Constants.TYPE_LIB, mContext, code); Observer<Document> observer = new MyObserver<Document>(TAG) { @Override//from www . jav a 2s.c o m public void onNext(final Document userCenterDom) { super.onNext(userCenterDom); Constants.checkAdvCustomInfo(mContext); final List<String> urlPatterns = Constants.sDetailCustomInfo.getBorrowUrlPatterns(); if (!urlPatterns.isEmpty()) { if (urlPatterns.size() == 1) { // fetch first page from user center, it will find the borrow info page for most cases Element target = HTMLUtils.getElementSimilar(userCenterDom, Jsoup.parse(urlPatterns.get(0)).body().children().first()); if (target != null) { loadTargetPage(manager, target.absUrl("href")); } } else if (urlPatterns.size() > 1) { // fetch more page to reach borrow info page Observable<String> extraObservable = Observable.create(new ObservableOnSubscribe<String>() { @Override public void subscribe(ObservableEmitter<String> e) throws Exception { LibraryFactory factory = LocalHelper.getLibrary(mContext); Document lastDom = userCenterDom; Element finalTarget = null; for (String pattern : urlPatterns) { if (lastDom != null) { finalTarget = HTMLUtils.getElementSimilar(lastDom, Jsoup.parse(pattern).body().children().first()); } if (finalTarget != null) { lastDom = factory.getBorrowPageDom(finalTarget.absUrl("href")); } } if (finalTarget != null) { e.onNext(finalTarget.absUrl("href")); } } }); Observer<String> extraObserver = new MyObserver<String>(TAG) { @Override public void onNext(String targetUrl) { loadTargetPage(manager, targetUrl); } }; extraObservable.subscribeOn(Schedulers.io()).observeOn(AndroidSchedulers.mainThread()) .subscribe(extraObserver); } else { ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_BORROW, userCenterDom, BorrowPresenter.this); } } else { ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_BORROW, userCenterDom, BorrowPresenter.this); } } @Override public void onError(Throwable e) { super.onError(e); ActivityUtils.showAdvCustomTip(mContext, Constants.TYPE_BORROW); Toast.makeText(mContext, e.getMessage(), Toast.LENGTH_LONG).show(); } }; observable.subscribeOn(Schedulers.newThread()).observeOn(AndroidSchedulers.mainThread()) .subscribe(observer); return null; }
From source file:cc.metapro.openct.grades.GradePresenter.java
@Override public Disposable loadUserCenter(final FragmentManager manager, final String code) { ActivityUtils.showProgressDialog(mContext, R.string.login_to_system); Observable<Document> observable = LocalHelper.login(Constants.TYPE_CMS, mContext, code); Observer<Document> observer = new MyObserver<Document>(TAG) { @Override//from w w w .j a va 2 s . com public void onNext(final Document userCenterDom) { super.onNext(userCenterDom); Constants.checkAdvCustomInfo(mContext); final List<String> urlPatterns = Constants.sDetailCustomInfo.getGradeUrlPatterns(); if (!urlPatterns.isEmpty()) { if (urlPatterns.size() == 1) { // fetch first page from user center, it will find the grade info page in most case Element target = HTMLUtils.getElementSimilar(userCenterDom, Jsoup.parse(urlPatterns.get(0)).body().children().first()); if (target != null) { loadTargetPage(manager, target.absUrl("href")); } } else if (urlPatterns.size() > 1) { // fetch more page to reach class info page, especially in QZ Data Soft CMS System Observable<String> extraObservable = Observable.create(new ObservableOnSubscribe<String>() { @Override public void subscribe(ObservableEmitter<String> e) throws Exception { CmsFactory factory = LocalHelper.getCms(mContext); Document lastDom = userCenterDom; Element finalTarget = null; for (String pattern : urlPatterns) { if (lastDom != null) { finalTarget = HTMLUtils.getElementSimilar(lastDom, Jsoup.parse(pattern).body().children().first()); } if (finalTarget != null) { lastDom = factory.getPageDom(finalTarget.absUrl("href")); } } String url = finalTarget.absUrl("href"); e.onNext(url); } }); Observer<String> extraObserver = new MyObserver<String>(TAG) { @Override public void onNext(String targetUrl) { loadTargetPage(manager, targetUrl); } }; extraObservable.subscribeOn(Schedulers.io()).observeOn(AndroidSchedulers.mainThread()) .subscribe(extraObserver); } else { ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_GRADE, userCenterDom, GradePresenter.this); } } else { ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_GRADE, userCenterDom, GradePresenter.this); } } @Override public void onError(Throwable e) { super.onError(e); ActivityUtils.showAdvCustomTip(mContext, Constants.TYPE_GRADE); Toast.makeText(mContext, e.getMessage(), Toast.LENGTH_LONG).show(); } }; observable.observeOn(AndroidSchedulers.mainThread()).subscribe(observer); return null; }
From source file:cc.metapro.openct.myclass.ClassPresenter.java
@Override public Disposable loadUserCenter(final FragmentManager manager, final String code) { ActivityUtils.showProgressDialog(mContext, R.string.login_to_system); Observable<Document> observable = LocalHelper.login(Constants.TYPE_CMS, mContext, code); Observer<Document> observer = new MyObserver<Document>(TAG) { @Override/* w w w . j a v a 2 s.c o m*/ public void onNext(final Document userCenterDom) { super.onNext(userCenterDom); Constants.checkAdvCustomInfo(mContext); final List<String> urlPatterns = Constants.sDetailCustomInfo.getClassUrlPatterns(); if (!urlPatterns.isEmpty()) { if (urlPatterns.size() == 1) { // fetch first page from user center, it will find the class info page in most case Element target = HTMLUtils.getElementSimilar(userCenterDom, Jsoup.parse(urlPatterns.get(0)).body().children().first()); if (target != null) { loadTargetPage(manager, target.absUrl("href")); } else { ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_CLASS, userCenterDom, ClassPresenter.this); } } else if (urlPatterns.size() > 1) { // fetch more page to reach class info page, especially in QZ Data Soft CMS System Observable<String> extraObservable = Observable.create(new ObservableOnSubscribe<String>() { @Override public void subscribe(ObservableEmitter<String> e) throws Exception { CmsFactory factory = LocalHelper.getCms(mContext); Document lastDom = userCenterDom; Element finalTarget = null; for (String pattern : urlPatterns) { if (lastDom != null) { finalTarget = HTMLUtils.getElementSimilar(lastDom, Jsoup.parse(pattern).body().children().first()); } if (finalTarget != null) { lastDom = factory.getPageDom(finalTarget.absUrl("href")); } } if (finalTarget != null) { e.onNext(finalTarget.absUrl("href")); } else { e.onError(new Exception("failed")); } } }); Observer<String> extraObserver = new MyObserver<String>(TAG) { @Override public void onNext(String targetUrl) { loadTargetPage(manager, targetUrl); } @Override public void onError(Throwable e) { super.onError(e); Toast.makeText(mContext, R.string.can_not_fetch_target_page, Toast.LENGTH_LONG) .show(); ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_CLASS, userCenterDom, ClassPresenter.this); } }; extraObservable.subscribeOn(Schedulers.io()).observeOn(AndroidSchedulers.mainThread()) .subscribe(extraObserver); } else { ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_CLASS, userCenterDom, ClassPresenter.this); } } else { ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_CLASS, userCenterDom, ClassPresenter.this); } } @Override public void onError(Throwable e) { super.onError(e); ActivityUtils.showAdvCustomTip(mContext, Constants.TYPE_CLASS); Toast.makeText(mContext, e.getMessage(), Toast.LENGTH_LONG).show(); } }; observable.observeOn(AndroidSchedulers.mainThread()).subscribe(observer); return null; }
From source file:org.asqatasun.ruleimplementation.link.AbstractDownloadableLinkRuleImplementation.java
@Override protected void select(SSPHandler sspHandler) { super.select(sspHandler); Iterator<Element> iter = getElements().get().iterator(); Element el; while (iter.hasNext()) { el = iter.next();//w ww . j a va 2 s . com try { URI uri = new URI(el.absUrl(HREF_ATTR)); if (isLinkWithProperExtension(uri)) { if (StringUtils.isNotBlank(uri.getFragment())) { iter.remove(); } else { linkWithSimpleExtension.add(el); } } } catch (Exception ex) { } } }
From source file:org.asqatasun.rules.elementchecker.ElementCheckerImpl.java
/** * /* w w w.j av a 2s. c o m*/ * @param element * @param attributeName * @param isExternalResource * @return the text content of an attribute */ protected String buildAttributeContent(Element element, String attributeName, boolean isExternalResource) { if (!element.hasAttr(attributeName)) { return ABSENT_ATTRIBUTE_VALUE; } else if (isExternalResource && !element.attr(ABS_URL_PREFIX + attributeName).isEmpty()) { return element.absUrl(attributeName).trim(); } else { return element.attr(attributeName).trim(); } }
From source file:org.b3log.symphony.util.Links.java
/** * Gets links from the specified HTML.//from w ww. j a v a2 s.c o m * * @param baseURL the specified base URL * @param html the specified HTML * @return a list of links, each of them like this: <pre> * { * "linkAddr": "https://hacpai.com/article/1440573175609", * "linkTitle": "", * "linkKeywords": "", * "linkHTML": "page HTML", * "linkText": "page text", * "linkBaiduRefCnt": int * } * </pre> */ public static List<JSONObject> getLinks(final String baseURL, final String html) { final Document doc = Jsoup.parse(html, baseURL); final Elements urlElements = doc.select("a"); final Set<String> urls = new HashSet<>(); final List<Spider> spiders = new ArrayList<>(); String url = null; for (final Element urlEle : urlElements) { try { url = urlEle.absUrl("href"); if (StringUtils.isBlank(url) || !StringUtils.contains(url, "://")) { url = StringUtils.substringBeforeLast(baseURL, "/") + url; } final URL formedURL = new URL(url); final String protocol = formedURL.getProtocol(); final String host = formedURL.getHost(); final int port = formedURL.getPort(); final String path = formedURL.getPath(); url = protocol + "://" + host; if (-1 != port && 80 != port && 443 != port) { url += ":" + port; } url += path; if (StringUtils.endsWith(url, "/")) { url = StringUtils.substringBeforeLast(url, "/"); } urls.add(url); } catch (final Exception e) { LOGGER.warn("Can't parse [" + url + "]"); } } final List<JSONObject> ret = new ArrayList<>(); try { for (final String u : urls) { spiders.add(new Spider(u)); } final List<Future<JSONObject>> results = Symphonys.EXECUTOR_SERVICE.invokeAll(spiders); for (final Future<JSONObject> result : results) { final JSONObject link = result.get(); if (null == link) { continue; } ret.add(link); } } catch (final Exception e) { LOGGER.log(Level.ERROR, "Parses URLs failed", e); } Collections.sort(ret, new Comparator<JSONObject>() { @Override public int compare(final JSONObject link1, final JSONObject link2) { return link1.optInt(Link.LINK_BAIDU_REF_CNT) - link2.optInt(Link.LINK_BAIDU_REF_CNT); } }); return ret; }
From source file:org.keionline.keionline.ArticleView.java
private String getContent(String url) throws IOException { Document doc = Jsoup.connect(url).userAgent("Mozilla").get(); Element data = doc.getElementsByClass("node").first();// get the third content div, Elements select = data.select("img"); // Change the links to absolute!! so that images work for (Element e : select) { e.attr("src", e.absUrl("src")); }/*www .j ava2s . com*/ select = data.select("a"); for (Element e : select) { e.attr("href", e.absUrl("href")); } Element info = data.getElementsByClass("submitted").first(); info.after("<hr>"); String cont = data.toString(); cont = CSS + cont + "</body>"; content = cont; return cont; }
From source file:org.opens.tanaguru.ruleimplementation.link.AbstractDownloadableLinkRuleImplementation.java
@Override protected void select(SSPHandler sspHandler, ElementHandler elementHandler) { super.select(sspHandler, elementHandler); Iterator<Element> iter = ((Collection<Element>) elementHandler.get()).iterator(); Element el; while (iter.hasNext()) { el = iter.next();/* w w w .ja v a 2s.c om*/ try { URI uri = new URI(el.absUrl(HREF_ATTR), true); if (isLinkWithProperExtension(uri)) { if (uri.hasFragment()) { iter.remove(); } else { linkWithSimpleExtension.add(el); } } } catch (URIException use) { } } }
From source file:org.opens.tanaguru.rules.elementchecker.helper.RuleCheckHelper.java
/** * /* ww w. j ava 2s .c o m*/ * @param element * @param attributeName * @param isExternalLink * @return */ private static String buildAttributeValue(Element element, String attributeName, boolean isExternalResource) { if (!element.hasAttr(attributeName)) { return ABSENT_ATTRIBUTE_VALUE; } else if (isExternalResource && !element.attr("abs:" + attributeName).isEmpty()) { return element.absUrl(attributeName); } else { return element.attr(attributeName); } }
From source file:org.sbs.goodcrawler.plugin.extract.ExtractorDytt8.java
@Override public ExtractedPage<?, ?> onExtract(Page page) { if (null != page) { try {//from ww w . j a v a 2 s. com Document doc = Jsoup.parse(new String(page.getContentData(), page.getContentCharset()), urlUtils.getBaseUrl(page.getWebURL().getURL())); if (null != page.getWebURL().getURL() && page.getWebURL().getURL().contains("game/")) return null; // ???Url?Url Elements links = doc.getElementsByTag("a"); if (!links.isEmpty()) { for (Element link : links) { String linkHref = link.absUrl("href"); if (StringUtils.isNotBlank(linkHref) && filterUrls(linkHref)) { try { WebURL url = new WebURL(); url.setURL(linkHref); url.setJobName(conf.jobName); pendingUrls.addUrl(url); } catch (QueueException e) { log.error(e.getMessage()); } catch (Exception e) { log.error(e.getMessage()); } } } } // ?? // Map<String, String> selects = conf.getSelects(); Map<String, String> selects = null; ExtractedPage<String, Object> epage = pendingStore.new ExtractedPage<String, Object>(); epage.setUrl(page.getWebURL()); HashMap<String, Object> result = new HashMap<>(); Elements text = doc.select("#Zoom"); if (null == text || text.size() == 0) { return null; } String name = doc.select("h1").text(); name = name.replace("", "").replace("<<", "").replace("", "").replace(">>", ""); result.put("movie", name); // result.put("_id", name); String ts[] = doc.select("h2 a").text().split(" "); if (ts.length >= 2) { result.put("type", ts[1].trim()); } else { result.put("type", "unknow"); } result.put("url", page.getWebURL().getURL()); for (Entry<String, String> entry : selects.entrySet()) { Elements elements = doc.select(entry.getValue()); if (elements.isEmpty()) return null; else { if ("content".equals(entry.getKey())) { for (Element element : elements) { // Elements imgs = element.select("img[src]"); StringBuilder sb = new StringBuilder(); for (Element img : imgs) { sb.append(img.attr("src")).append(";"); } result.put("img", sb.toString()); // ? Elements movieInfos = element.select("p"); for (Element info : movieInfos) { String infotext = info.text(); try { String infotext_ = info.html(); int start, end = 0; start = infotext_.indexOf(""); if (start > 0) { end = infotext_.lastIndexOf(""); if (end > 0 && start < end) { result.put("jq", infotext_.substring(start, end)); } else { end = infotext_.lastIndexOf("."); if (end > 0 && start < end) { result.put("jq", infotext_.substring(start, end)); } } } infotext_ = null; } catch (Exception e) { e.printStackTrace(); } if (infotext.startsWith("")) { String ss[] = infotext.split(""); for (String s : ss) { s.trim(); result = getInfoName(s, result); } } else if (infotext.startsWith("?")) { String ss[] = infotext.split("?"); for (String s : ss) { s.trim(); result = getInfoName(s, result); } } else if (infotext.contains("")) { infotext = info.html(); String[] ss = infotext.split("<br />"); for (String s : ss) { s.trim(); result = getInfoName(s, result); } } else if (infotext.contains(":")) { infotext = info.html(); String[] ss = infotext.split("<br />"); for (String s : ss) { s.trim(); result = getInfoName(s, result); } } } // if(result.size()<5){ // result.put("content", value) // } // ? Elements elements2 = elements.select("td"); sb.setLength(0); for (Element download : elements2) { sb.append(download.text()).append(";"); } result.put("download", sb.toString()); } } } // result.put(entry.getKey(), elements.html()); } if (StringUtils.isNotBlank((String) result.get("nd"))) { result.put("nd", Integer.parseInt((String) result.get("nd"))); } epage.setMessages(result); try { pendingStore.addExtracedPage(epage); } catch (QueueException e) { log.error(e.getMessage()); } return epage; } catch (UnsupportedEncodingException e) { log.error(e.getMessage()); e.printStackTrace(); } } return null; }