Java HTML Jsoup Document getDocument(final String url)

Here you can find the source of getDocument(final String url)

Description

get Document

License

Open Source License

Declaration

public static final Document getDocument(final String url) throws MalformedURLException, IOException 

Method Source Code


//package com.java2s;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class Main {
    private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17";
    private static final int CONNECTION_TIMEOUT = 60000;

    public static final Document getDocument(final String url) throws MalformedURLException, IOException {
        return getDocument(new URL(url));
    }//from w w  w  .  j av a2s.com

    public static final Document getDocument(final URL url) throws IOException {
        return Jsoup.connect(url.toString()).ignoreContentType(true).timeout(CONNECTION_TIMEOUT)
                .userAgent(USER_AGENT).get();
    }
}

Related

  1. getAllText(Document document)
  2. getCategoryIds(final Document html)
  3. getContainersForLink(Document document, String link)
  4. getDivForClass(Document document, String className)
  5. getDocument(CloseableHttpClient client, String url)
  6. getDocument(String url)
  7. getHtmlDocument(String url)
  8. getIcon(Document doc)
  9. getInfoboxLines(final Document html, final boolean stripColor)