List of utility methods to do HTML Jsoup Document
Document | getDocument(final String url) get Document return getDocument(new URL(url)); |
Document | getDocument(String url) get Document return Jsoup.connect(url).timeout(TIME_OUT).get();
|
Document | getHtmlDocument(String url) get Html Document Document doc = Jsoup.connect(url).get();
return doc;
|
String | getIcon(Document doc) get Icon String meta; try { meta = doc.head().select("link[href~=.*\\.ico]").first().attr("abs:href"); } catch (NullPointerException ignored) { String uri = new URI(doc.location()).getHost(); return uri.endsWith("/") ? uri + "favicon.ico" : uri + "/favicon.ico"; return meta; ... |
List | getInfoboxLines(final Document html, final boolean stripColor) get Infobox Lines final Optional<String> infoboxData = html.getElementsByTag("script").stream().map(Element::data) .filter(data -> data.contains("arkup.printHtml")).findFirst(); if (!infoboxData.isPresent()) { return Collections.emptyList(); final String infoboxMarkup = getRegexGroup(infoboxData.get(), "[Mm]arkup\\.printHtml\\((['\"])(.*)\\1, 'infobox", 2).get().replace("\\/", "/"); final Matcher matcher = Pattern.compile("\\\\x([0-9A-Z]{2})").matcher(infoboxMarkup); ... |
Document | getJSoupHtmlDocument(final String url) get J Soup Html Document Document result = null; try { Connection dom = Jsoup.connect(url); result = dom.get(); } catch (IOException ioe) { System.err.println(ioe.getMessage()); return result; ... |
Document | getJSoupXmlDocument(final String url) get J Soup Xml Document final String rawXml = getTextFromURL(url); return Jsoup.parse(rawXml, "", Parser.xmlParser()); |
String[] | getLoginFields(Document doc) get Login Fields String[] fields = new String[3]; Elements eleInputFields = doc.select("input[id~=^txt\\w+]"); for (int i = 0; i < 3; i++) { fields[i] = eleInputFields.get(i).attr("id"); return fields; |
String | getTextFromAvailableDivID(Document doc, String divID) get Text From Available Div ID Elements elementsById = doc.getElementsByClass(divID); if (elementsById.size() > 0) { Element element = elementsById.first(); String result = element.text(); return result; } else { return "none"; |
String | getTitleFromDocument(Document doc) get Title From Document String title = (doc != null) ? doc.title() : "none"; String[] titleText = title.split("-"); return titleText[0].trim(); |