Example usage for org.jsoup.nodes Element absUrl

List of usage examples for org.jsoup.nodes Element absUrl

Introduction

In this page you can find the example usage for org.jsoup.nodes Element absUrl.

Prototype

public String absUrl(String attributeKey) 

Source Link

Document

Get an absolute URL from a URL attribute that may be relative (i.e.

Usage

From source file:cc.metapro.openct.borrow.BorrowPresenter.java

@Override
public Disposable loadUserCenter(final FragmentManager manager, final String code) {
    ActivityUtils.showProgressDialog(mContext, R.string.loading_borrows);
    Observable<Document> observable = LocalHelper.login(Constants.TYPE_LIB, mContext, code);

    Observer<Document> observer = new MyObserver<Document>(TAG) {
        @Override//from  www  .  jav a  2s.c  o m
        public void onNext(final Document userCenterDom) {
            super.onNext(userCenterDom);
            Constants.checkAdvCustomInfo(mContext);
            final List<String> urlPatterns = Constants.sDetailCustomInfo.getBorrowUrlPatterns();
            if (!urlPatterns.isEmpty()) {
                if (urlPatterns.size() == 1) {
                    // fetch first page from user center, it will find the borrow info page for most cases
                    Element target = HTMLUtils.getElementSimilar(userCenterDom,
                            Jsoup.parse(urlPatterns.get(0)).body().children().first());
                    if (target != null) {
                        loadTargetPage(manager, target.absUrl("href"));
                    }
                } else if (urlPatterns.size() > 1) {
                    // fetch more page to reach borrow info page
                    Observable<String> extraObservable = Observable.create(new ObservableOnSubscribe<String>() {
                        @Override
                        public void subscribe(ObservableEmitter<String> e) throws Exception {
                            LibraryFactory factory = LocalHelper.getLibrary(mContext);
                            Document lastDom = userCenterDom;
                            Element finalTarget = null;
                            for (String pattern : urlPatterns) {
                                if (lastDom != null) {
                                    finalTarget = HTMLUtils.getElementSimilar(lastDom,
                                            Jsoup.parse(pattern).body().children().first());
                                }
                                if (finalTarget != null) {
                                    lastDom = factory.getBorrowPageDom(finalTarget.absUrl("href"));
                                }
                            }

                            if (finalTarget != null) {
                                e.onNext(finalTarget.absUrl("href"));
                            }
                        }
                    });

                    Observer<String> extraObserver = new MyObserver<String>(TAG) {
                        @Override
                        public void onNext(String targetUrl) {
                            loadTargetPage(manager, targetUrl);
                        }
                    };

                    extraObservable.subscribeOn(Schedulers.io()).observeOn(AndroidSchedulers.mainThread())
                            .subscribe(extraObserver);
                } else {
                    ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_BORROW, userCenterDom,
                            BorrowPresenter.this);
                }
            } else {
                ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_BORROW, userCenterDom,
                        BorrowPresenter.this);
            }
        }

        @Override
        public void onError(Throwable e) {
            super.onError(e);
            ActivityUtils.showAdvCustomTip(mContext, Constants.TYPE_BORROW);
            Toast.makeText(mContext, e.getMessage(), Toast.LENGTH_LONG).show();
        }
    };

    observable.subscribeOn(Schedulers.newThread()).observeOn(AndroidSchedulers.mainThread())
            .subscribe(observer);

    return null;
}

From source file:cc.metapro.openct.grades.GradePresenter.java

@Override
public Disposable loadUserCenter(final FragmentManager manager, final String code) {
    ActivityUtils.showProgressDialog(mContext, R.string.login_to_system);

    Observable<Document> observable = LocalHelper.login(Constants.TYPE_CMS, mContext, code);

    Observer<Document> observer = new MyObserver<Document>(TAG) {
        @Override//from w w  w  .j  a  va 2 s  . com
        public void onNext(final Document userCenterDom) {
            super.onNext(userCenterDom);
            Constants.checkAdvCustomInfo(mContext);
            final List<String> urlPatterns = Constants.sDetailCustomInfo.getGradeUrlPatterns();
            if (!urlPatterns.isEmpty()) {
                if (urlPatterns.size() == 1) {
                    // fetch first page from user center, it will find the grade info page in most case
                    Element target = HTMLUtils.getElementSimilar(userCenterDom,
                            Jsoup.parse(urlPatterns.get(0)).body().children().first());
                    if (target != null) {
                        loadTargetPage(manager, target.absUrl("href"));
                    }
                } else if (urlPatterns.size() > 1) {
                    // fetch more page to reach class info page, especially in QZ Data Soft CMS System
                    Observable<String> extraObservable = Observable.create(new ObservableOnSubscribe<String>() {
                        @Override
                        public void subscribe(ObservableEmitter<String> e) throws Exception {
                            CmsFactory factory = LocalHelper.getCms(mContext);
                            Document lastDom = userCenterDom;
                            Element finalTarget = null;
                            for (String pattern : urlPatterns) {
                                if (lastDom != null) {
                                    finalTarget = HTMLUtils.getElementSimilar(lastDom,
                                            Jsoup.parse(pattern).body().children().first());
                                }
                                if (finalTarget != null) {
                                    lastDom = factory.getPageDom(finalTarget.absUrl("href"));
                                }
                            }
                            String url = finalTarget.absUrl("href");
                            e.onNext(url);
                        }
                    });

                    Observer<String> extraObserver = new MyObserver<String>(TAG) {
                        @Override
                        public void onNext(String targetUrl) {
                            loadTargetPage(manager, targetUrl);
                        }
                    };

                    extraObservable.subscribeOn(Schedulers.io()).observeOn(AndroidSchedulers.mainThread())
                            .subscribe(extraObserver);
                } else {
                    ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_GRADE, userCenterDom,
                            GradePresenter.this);
                }
            } else {
                ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_GRADE, userCenterDom,
                        GradePresenter.this);
            }
        }

        @Override
        public void onError(Throwable e) {
            super.onError(e);
            ActivityUtils.showAdvCustomTip(mContext, Constants.TYPE_GRADE);
            Toast.makeText(mContext, e.getMessage(), Toast.LENGTH_LONG).show();
        }
    };

    observable.observeOn(AndroidSchedulers.mainThread()).subscribe(observer);

    return null;
}

From source file:cc.metapro.openct.myclass.ClassPresenter.java

@Override
public Disposable loadUserCenter(final FragmentManager manager, final String code) {
    ActivityUtils.showProgressDialog(mContext, R.string.login_to_system);

    Observable<Document> observable = LocalHelper.login(Constants.TYPE_CMS, mContext, code);

    Observer<Document> observer = new MyObserver<Document>(TAG) {
        @Override/* w  w  w  . j  a v a 2 s.c o m*/
        public void onNext(final Document userCenterDom) {
            super.onNext(userCenterDom);
            Constants.checkAdvCustomInfo(mContext);
            final List<String> urlPatterns = Constants.sDetailCustomInfo.getClassUrlPatterns();
            if (!urlPatterns.isEmpty()) {
                if (urlPatterns.size() == 1) {
                    // fetch first page from user center, it will find the class info page in most case
                    Element target = HTMLUtils.getElementSimilar(userCenterDom,
                            Jsoup.parse(urlPatterns.get(0)).body().children().first());
                    if (target != null) {
                        loadTargetPage(manager, target.absUrl("href"));
                    } else {
                        ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_CLASS, userCenterDom,
                                ClassPresenter.this);
                    }
                } else if (urlPatterns.size() > 1) {
                    // fetch more page to reach class info page, especially in QZ Data Soft CMS System
                    Observable<String> extraObservable = Observable.create(new ObservableOnSubscribe<String>() {
                        @Override
                        public void subscribe(ObservableEmitter<String> e) throws Exception {
                            CmsFactory factory = LocalHelper.getCms(mContext);
                            Document lastDom = userCenterDom;
                            Element finalTarget = null;
                            for (String pattern : urlPatterns) {
                                if (lastDom != null) {
                                    finalTarget = HTMLUtils.getElementSimilar(lastDom,
                                            Jsoup.parse(pattern).body().children().first());
                                }
                                if (finalTarget != null) {
                                    lastDom = factory.getPageDom(finalTarget.absUrl("href"));
                                }
                            }
                            if (finalTarget != null) {
                                e.onNext(finalTarget.absUrl("href"));
                            } else {
                                e.onError(new Exception("failed"));
                            }
                        }
                    });

                    Observer<String> extraObserver = new MyObserver<String>(TAG) {
                        @Override
                        public void onNext(String targetUrl) {
                            loadTargetPage(manager, targetUrl);
                        }

                        @Override
                        public void onError(Throwable e) {
                            super.onError(e);
                            Toast.makeText(mContext, R.string.can_not_fetch_target_page, Toast.LENGTH_LONG)
                                    .show();
                            ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_CLASS, userCenterDom,
                                    ClassPresenter.this);
                        }
                    };

                    extraObservable.subscribeOn(Schedulers.io()).observeOn(AndroidSchedulers.mainThread())
                            .subscribe(extraObserver);
                } else {
                    ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_CLASS, userCenterDom,
                            ClassPresenter.this);
                }
            } else {
                ActivityUtils.showLinkSelectionDialog(manager, Constants.TYPE_CLASS, userCenterDom,
                        ClassPresenter.this);
            }
        }

        @Override
        public void onError(Throwable e) {
            super.onError(e);
            ActivityUtils.showAdvCustomTip(mContext, Constants.TYPE_CLASS);
            Toast.makeText(mContext, e.getMessage(), Toast.LENGTH_LONG).show();
        }
    };

    observable.observeOn(AndroidSchedulers.mainThread()).subscribe(observer);

    return null;
}

From source file:org.asqatasun.ruleimplementation.link.AbstractDownloadableLinkRuleImplementation.java

@Override
protected void select(SSPHandler sspHandler) {
    super.select(sspHandler);
    Iterator<Element> iter = getElements().get().iterator();
    Element el;
    while (iter.hasNext()) {
        el = iter.next();//w  ww  . j a va 2 s .  com
        try {
            URI uri = new URI(el.absUrl(HREF_ATTR));
            if (isLinkWithProperExtension(uri)) {
                if (StringUtils.isNotBlank(uri.getFragment())) {
                    iter.remove();
                } else {
                    linkWithSimpleExtension.add(el);
                }
            }
        } catch (Exception ex) {
        }
    }
}

From source file:org.asqatasun.rules.elementchecker.ElementCheckerImpl.java

/**
 * /*  w w w.j  av a 2s.  c  o m*/
 * @param element
 * @param attributeName
 * @param isExternalResource
 * @return the text content of an attribute
 */
protected String buildAttributeContent(Element element, String attributeName, boolean isExternalResource) {
    if (!element.hasAttr(attributeName)) {
        return ABSENT_ATTRIBUTE_VALUE;
    } else if (isExternalResource && !element.attr(ABS_URL_PREFIX + attributeName).isEmpty()) {
        return element.absUrl(attributeName).trim();
    } else {
        return element.attr(attributeName).trim();
    }
}

From source file:org.b3log.symphony.util.Links.java

/**
 * Gets links from the specified HTML.//from  w ww. j  a  v  a2  s.c  o  m
 *
 * @param baseURL the specified base URL
 * @param html the specified HTML
 * @return a list of links, each of them like this:      <pre>
 * {
 *     "linkAddr": "https://hacpai.com/article/1440573175609",
 *     "linkTitle": "",
 *     "linkKeywords": "",
 *     "linkHTML": "page HTML",
 *     "linkText": "page text",
 *     "linkBaiduRefCnt": int
 * }
 * </pre>
 */
public static List<JSONObject> getLinks(final String baseURL, final String html) {
    final Document doc = Jsoup.parse(html, baseURL);
    final Elements urlElements = doc.select("a");

    final Set<String> urls = new HashSet<>();
    final List<Spider> spiders = new ArrayList<>();

    String url = null;
    for (final Element urlEle : urlElements) {
        try {
            url = urlEle.absUrl("href");
            if (StringUtils.isBlank(url) || !StringUtils.contains(url, "://")) {
                url = StringUtils.substringBeforeLast(baseURL, "/") + url;
            }

            final URL formedURL = new URL(url);
            final String protocol = formedURL.getProtocol();
            final String host = formedURL.getHost();
            final int port = formedURL.getPort();
            final String path = formedURL.getPath();

            url = protocol + "://" + host;
            if (-1 != port && 80 != port && 443 != port) {
                url += ":" + port;
            }
            url += path;

            if (StringUtils.endsWith(url, "/")) {
                url = StringUtils.substringBeforeLast(url, "/");
            }

            urls.add(url);
        } catch (final Exception e) {
            LOGGER.warn("Can't parse [" + url + "]");
        }
    }

    final List<JSONObject> ret = new ArrayList<>();

    try {
        for (final String u : urls) {
            spiders.add(new Spider(u));
        }

        final List<Future<JSONObject>> results = Symphonys.EXECUTOR_SERVICE.invokeAll(spiders);
        for (final Future<JSONObject> result : results) {
            final JSONObject link = result.get();
            if (null == link) {
                continue;
            }

            ret.add(link);
        }
    } catch (final Exception e) {
        LOGGER.log(Level.ERROR, "Parses URLs failed", e);
    }

    Collections.sort(ret, new Comparator<JSONObject>() {
        @Override
        public int compare(final JSONObject link1, final JSONObject link2) {
            return link1.optInt(Link.LINK_BAIDU_REF_CNT) - link2.optInt(Link.LINK_BAIDU_REF_CNT);
        }
    });

    return ret;
}

From source file:org.keionline.keionline.ArticleView.java

private String getContent(String url) throws IOException {
    Document doc = Jsoup.connect(url).userAgent("Mozilla").get();
    Element data = doc.getElementsByClass("node").first();// get the third content div,
    Elements select = data.select("img");
    // Change the links to absolute!! so that images work
    for (Element e : select) {
        e.attr("src", e.absUrl("src"));
    }/*www  .j  ava2s .  com*/
    select = data.select("a");
    for (Element e : select) {
        e.attr("href", e.absUrl("href"));
    }
    Element info = data.getElementsByClass("submitted").first();
    info.after("<hr>");
    String cont = data.toString();
    cont = CSS + cont + "</body>";
    content = cont;
    return cont;
}

From source file:org.opens.tanaguru.ruleimplementation.link.AbstractDownloadableLinkRuleImplementation.java

@Override
protected void select(SSPHandler sspHandler, ElementHandler elementHandler) {
    super.select(sspHandler, elementHandler);
    Iterator<Element> iter = ((Collection<Element>) elementHandler.get()).iterator();
    Element el;
    while (iter.hasNext()) {
        el = iter.next();/* w  w  w  .ja  v  a 2s.c om*/
        try {
            URI uri = new URI(el.absUrl(HREF_ATTR), true);
            if (isLinkWithProperExtension(uri)) {
                if (uri.hasFragment()) {
                    iter.remove();
                } else {
                    linkWithSimpleExtension.add(el);
                }
            }
        } catch (URIException use) {
        }
    }
}

From source file:org.opens.tanaguru.rules.elementchecker.helper.RuleCheckHelper.java

/**
 * /*  ww  w.  j ava  2s .c o  m*/
 * @param element
 * @param attributeName
 * @param isExternalLink
 * @return 
 */
private static String buildAttributeValue(Element element, String attributeName, boolean isExternalResource) {
    if (!element.hasAttr(attributeName)) {
        return ABSENT_ATTRIBUTE_VALUE;
    } else if (isExternalResource && !element.attr("abs:" + attributeName).isEmpty()) {
        return element.absUrl(attributeName);
    } else {
        return element.attr(attributeName);
    }
}

From source file:org.sbs.goodcrawler.plugin.extract.ExtractorDytt8.java

@Override
public ExtractedPage<?, ?> onExtract(Page page) {
    if (null != page) {
        try {//from   ww w  . j  a v  a  2 s.  com

            Document doc = Jsoup.parse(new String(page.getContentData(), page.getContentCharset()),
                    urlUtils.getBaseUrl(page.getWebURL().getURL()));
            if (null != page.getWebURL().getURL() && page.getWebURL().getURL().contains("game/"))
                return null;
            // ???Url?Url
            Elements links = doc.getElementsByTag("a");
            if (!links.isEmpty()) {
                for (Element link : links) {
                    String linkHref = link.absUrl("href");
                    if (StringUtils.isNotBlank(linkHref) && filterUrls(linkHref)) {
                        try {
                            WebURL url = new WebURL();

                            url.setURL(linkHref);
                            url.setJobName(conf.jobName);
                            pendingUrls.addUrl(url);
                        } catch (QueueException e) {
                            log.error(e.getMessage());
                        } catch (Exception e) {
                            log.error(e.getMessage());
                        }
                    }
                }
            }
            // ??
            //            Map<String, String> selects = conf.getSelects();
            Map<String, String> selects = null;
            ExtractedPage<String, Object> epage = pendingStore.new ExtractedPage<String, Object>();
            epage.setUrl(page.getWebURL());
            HashMap<String, Object> result = new HashMap<>();
            Elements text = doc.select("#Zoom");
            if (null == text || text.size() == 0) {
                return null;
            }
            String name = doc.select("h1").text();
            name = name.replace("", "").replace("<<", "").replace("", "").replace(">>", "");
            result.put("movie", name);
            //            result.put("_id", name);
            String ts[] = doc.select("h2 a").text().split(" ");
            if (ts.length >= 2) {
                result.put("type", ts[1].trim());
            } else {
                result.put("type", "unknow");
            }
            result.put("url", page.getWebURL().getURL());
            for (Entry<String, String> entry : selects.entrySet()) {
                Elements elements = doc.select(entry.getValue());
                if (elements.isEmpty())
                    return null;
                else {
                    if ("content".equals(entry.getKey())) {

                        for (Element element : elements) {
                            // 
                            Elements imgs = element.select("img[src]");
                            StringBuilder sb = new StringBuilder();
                            for (Element img : imgs) {
                                sb.append(img.attr("src")).append(";");
                            }
                            result.put("img", sb.toString());
                            // ?
                            Elements movieInfos = element.select("p");
                            for (Element info : movieInfos) {
                                String infotext = info.text();
                                try {
                                    String infotext_ = info.html();
                                    int start, end = 0;
                                    start = infotext_.indexOf("");
                                    if (start > 0) {
                                        end = infotext_.lastIndexOf("");
                                        if (end > 0 && start < end) {
                                            result.put("jq", infotext_.substring(start, end));
                                        } else {
                                            end = infotext_.lastIndexOf(".");
                                            if (end > 0 && start < end) {
                                                result.put("jq", infotext_.substring(start, end));
                                            }
                                        }
                                    }
                                    infotext_ = null;
                                } catch (Exception e) {
                                    e.printStackTrace();
                                }

                                if (infotext.startsWith("")) {
                                    String ss[] = infotext.split("");
                                    for (String s : ss) {
                                        s.trim();
                                        result = getInfoName(s, result);
                                    }
                                } else if (infotext.startsWith("?")) {
                                    String ss[] = infotext.split("?");
                                    for (String s : ss) {
                                        s.trim();
                                        result = getInfoName(s, result);
                                    }
                                } else if (infotext.contains("")) {
                                    infotext = info.html();
                                    String[] ss = infotext.split("<br />");
                                    for (String s : ss) {
                                        s.trim();
                                        result = getInfoName(s, result);
                                    }
                                } else if (infotext.contains(":")) {
                                    infotext = info.html();
                                    String[] ss = infotext.split("<br />");
                                    for (String s : ss) {
                                        s.trim();
                                        result = getInfoName(s, result);
                                    }
                                }
                            }

                            //                        if(result.size()<5){
                            //                           result.put("content", value)
                            //                        }

                            // ?
                            Elements elements2 = elements.select("td");
                            sb.setLength(0);
                            for (Element download : elements2) {
                                sb.append(download.text()).append(";");
                            }
                            result.put("download", sb.toString());
                        }
                    }
                }
                //               result.put(entry.getKey(), elements.html());
            }
            if (StringUtils.isNotBlank((String) result.get("nd"))) {
                result.put("nd", Integer.parseInt((String) result.get("nd")));
            }
            epage.setMessages(result);
            try {
                pendingStore.addExtracedPage(epage);
            } catch (QueueException e) {
                log.error(e.getMessage());
            }
            return epage;
        } catch (UnsupportedEncodingException e) {
            log.error(e.getMessage());
            e.printStackTrace();
        }
    }
    return null;
}