Java Utililty Methods HTML Jsoup Document

List of utility methods to do HTML Jsoup Document

Description

The list of methods to do HTML Jsoup Document are organized into topic(s).

Method

DocumentgetDocument(final String url)
get Document
return getDocument(new URL(url));
DocumentgetDocument(String url)
get Document
return Jsoup.connect(url).timeout(TIME_OUT).get();
DocumentgetHtmlDocument(String url)
get Html Document
Document doc = Jsoup.connect(url).get();
return doc;
StringgetIcon(Document doc)
get Icon
String meta;
try {
    meta = doc.head().select("link[href~=.*\\.ico]").first().attr("abs:href");
} catch (NullPointerException ignored) {
    String uri = new URI(doc.location()).getHost();
    return uri.endsWith("/") ? uri + "favicon.ico" : uri + "/favicon.ico";
return meta;
...
ListgetInfoboxLines(final Document html, final boolean stripColor)
get Infobox Lines
final Optional<String> infoboxData = html.getElementsByTag("script").stream().map(Element::data)
        .filter(data -> data.contains("arkup.printHtml")).findFirst();
if (!infoboxData.isPresent()) {
    return Collections.emptyList();
final String infoboxMarkup = getRegexGroup(infoboxData.get(),
        "[Mm]arkup\\.printHtml\\((['\"])(.*)\\1, 'infobox", 2).get().replace("\\/", "/"); 
final Matcher matcher = Pattern.compile("\\\\x([0-9A-Z]{2})").matcher(infoboxMarkup);
...
DocumentgetJSoupHtmlDocument(final String url)
get J Soup Html Document
Document result = null;
try {
    Connection dom = Jsoup.connect(url);
    result = dom.get();
} catch (IOException ioe) {
    System.err.println(ioe.getMessage());
return result;
...
DocumentgetJSoupXmlDocument(final String url)
get J Soup Xml Document
final String rawXml = getTextFromURL(url);
return Jsoup.parse(rawXml, "", Parser.xmlParser());
String[]getLoginFields(Document doc)
get Login Fields
String[] fields = new String[3];
Elements eleInputFields = doc.select("input[id~=^txt\\w+]");
for (int i = 0; i < 3; i++) {
    fields[i] = eleInputFields.get(i).attr("id");
return fields;
StringgetTextFromAvailableDivID(Document doc, String divID)
get Text From Available Div ID
Elements elementsById = doc.getElementsByClass(divID);
if (elementsById.size() > 0) {
    Element element = elementsById.first();
    String result = element.text();
    return result;
} else {
    return "none";
StringgetTitleFromDocument(Document doc)
get Title From Document
String title = (doc != null) ? doc.title() : "none";
String[] titleText = title.split("-");
return titleText[0].trim();