Java HTML Jsoup Document convertLinksToAbsolute(String link, org.jsoup.nodes.Document doc)

Here you can find the source of convertLinksToAbsolute(String link, org.jsoup.nodes.Document doc)

Description

convert Links To Absolute

License

Open Source License

Declaration

public static org.jsoup.nodes.Document convertLinksToAbsolute(String link, org.jsoup.nodes.Document doc)
            throws MalformedURLException, URISyntaxException 

Method Source Code

//package com.java2s;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Main {
    public static org.jsoup.nodes.Document convertLinksToAbsolute(String link, org.jsoup.nodes.Document doc)
            throws MalformedURLException, URISyntaxException {

        doc.setBaseUri(getBaseLink(link));

        Elements links = doc.select("a");

        for (Element e : links) {
            e.setBaseUri(doc.baseUri());

            //Skip internal book marks.
            if (!e.attr("href").startsWith("#")) {
                e.attr("href", e.attr("abs:href"));
            }//from  w ww  . j a  v  a 2  s  . co  m
        }

        links = doc.select("img");
        for (Element e : links) {
            e.setBaseUri(doc.baseUri());
            e.attr("src", e.attr("abs:src"));
        }

        links = doc.select("script");
        for (Element e : links) {
            e.setBaseUri(doc.baseUri());
            e.attr("src", e.attr("abs:src"));
        }

        links = doc.select("link");
        for (Element e : links) {
            e.setBaseUri(doc.baseUri());
            e.attr("href", e.attr("abs:href"));
        }

        return doc;

    }

    /***
     * Converts all links in the specified string to absolute.
     * @param text
     * @return
     * @throws MalformedURLException 
     * @throws TransformerException 
     * @throws ParserConfigurationException 
     * @throws SAXException 
     * @throws IOException 
     * @throws URISyntaxException 
     */
    public static String convertLinksToAbsolute(String link, String text)
            throws MalformedURLException, URISyntaxException {

        org.jsoup.nodes.Document doc = Jsoup.parse(text);

        return convertLinksToAbsolute(link, doc).html();
    }

    /***
     * Returns the base link 
     * @param link
     * @return
     * @throws URISyntaxException 
     * @throws MalformedURLException 
     */
    public static String getBaseLink(String link) throws URISyntaxException, MalformedURLException {

        URI u = new URI(link).parseServerAuthority();

        return u.getScheme() + "://" + u.getAuthority() + "/";
    }
}

Related

  1. applyCacheKeysToResourceUrls(Document document, long pluginModifiedTimestamp, Locale locale)
  2. detectLanguage(Document doc)
  3. emptyDocument()
  4. formatDocument(Document doc)
  5. getAllText(Document document)