Java Utililty Methods HTML Parse Jsoup

List of utility methods to do HTML Parse Jsoup

Description

The list of methods to do HTML Parse Jsoup are organized into topic(s).

Method

Elementparse(String html)
parse
return Jsoup.parse(html);
Documentparse(URL url, int timeout)
parse
Document doc = null;
final int LIMIT = 10;
final int LIMIT_SLEEP = 2;
int iteration = 0;
while (null == doc) {
    try {
        doc = Jsoup.connect(url.toString()).timeout(timeout).referrer("http://www.google.com/search")
                .userAgent(
...
DocumentparseByteData(ByteBuffer byteData, String charsetName, String baseUri, Parser parser)
parse Byte Data
String docData;
Document doc = null;
if (charsetName == null) { 
    docData = Charset.forName(defaultCharset).decode(byteData).toString();
    doc = parser.parseInput(docData, baseUri);
    Element meta = doc.select("meta[http-equiv=content-type], meta[charset]").first();
    if (meta != null) { 
        String foundCharset;
...
StringparseEmail(String content)
parse Email
StringBuffer sb = new StringBuffer();
Document document = Jsoup.parse(content.toString());
Elements div = document.getElementsByTag("span");
for (Element e : div) {
    sb.append(e.text());
return sb.toString();
DocumentparseFile(String filePath)
parse File
File inputFile = new File(filePath);
return Jsoup.parse(inputFile, "UTF-8");
voidparseInfoBody(Element element)
parse Info Body
element.getElementById("mf-section-0").remove();
Elements tabs = element.select("h2");
for (int i = 0, total = tabs.size(); i < total; i++) {
    Element item = tabs.get(i);
    System.err.println(item.text());
    Elements mf_section = element.getElementsByClass("mf-section-" + (i + 1));
    System.err.println(mf_section.text());
System.err.println(tabs.size());
voidparseInfoHeader(Element element)
parse Info Header
Element infotemplatebox = element.getElementsByClass("infotemplatebox").first();
if (infotemplatebox != null) {
    parseTemplate1_1(element);
    return;
infotemplatebox = element.select("table").first();
if (infotemplatebox != null) {
    parseTemplate1_2(element);
...
MapparsePropertyTable(Element table)
parse Property Table
Map<String, String> ret = new HashMap<String, String>();
Elements tr = table.select("tr");
for (Element element : tr) {
    addProperty(ret, element);
return ret;
ArrayListparseTable2ArrayList(Document doc, String selectorRow, String selectorCol)
parse Table Array List
Elements rows = doc.select(selectorRow);
ArrayList<String[]> arrayList = new ArrayList<String[]>();
for (Element row : rows) {
    Elements cols = row.select(selectorCol);
    String[] array = new String[cols.size()];
    for (int i = 0; i < cols.size(); i++) {
        array[i] = cols.get(i).html();
    arrayList.add(array);
return arrayList;
voidparseTemplate1_1(Element element)
parse Templat_
JSONObject jsonObject = new JSONObject();
JSONObject jsonItem = new JSONObject();
Element infotemplatebox = element.getElementsByClass("infotemplatebox").first();
Elements elements = infotemplatebox.select("tr");
for (int i = 0, total = elements.size(); i < total; i++) {
    Element item = elements.get(i);
    if (i == 0) {
        jsonObject.put("cover", item.select("img").attr("src"));
...