List of utility methods to do HTML Parse Jsoup
Element | parse(String html) parse return Jsoup.parse(html);
|
Document | parse(URL url, int timeout) parse Document doc = null; final int LIMIT = 10; final int LIMIT_SLEEP = 2; int iteration = 0; while (null == doc) { try { doc = Jsoup.connect(url.toString()).timeout(timeout).referrer("http://www.google.com/search") .userAgent( ... |
Document | parseByteData(ByteBuffer byteData, String charsetName, String baseUri, Parser parser) parse Byte Data String docData; Document doc = null; if (charsetName == null) { docData = Charset.forName(defaultCharset).decode(byteData).toString(); doc = parser.parseInput(docData, baseUri); Element meta = doc.select("meta[http-equiv=content-type], meta[charset]").first(); if (meta != null) { String foundCharset; ... |
String | parseEmail(String content) parse Email StringBuffer sb = new StringBuffer(); Document document = Jsoup.parse(content.toString()); Elements div = document.getElementsByTag("span"); for (Element e : div) { sb.append(e.text()); return sb.toString(); |
Document | parseFile(String filePath) parse File File inputFile = new File(filePath); return Jsoup.parse(inputFile, "UTF-8"); |
void | parseInfoBody(Element element) parse Info Body element.getElementById("mf-section-0").remove(); Elements tabs = element.select("h2"); for (int i = 0, total = tabs.size(); i < total; i++) { Element item = tabs.get(i); System.err.println(item.text()); Elements mf_section = element.getElementsByClass("mf-section-" + (i + 1)); System.err.println(mf_section.text()); System.err.println(tabs.size()); |
void | parseInfoHeader(Element element) parse Info Header Element infotemplatebox = element.getElementsByClass("infotemplatebox").first(); if (infotemplatebox != null) { parseTemplate1_1(element); return; infotemplatebox = element.select("table").first(); if (infotemplatebox != null) { parseTemplate1_2(element); ... |
Map | parsePropertyTable(Element table) parse Property Table Map<String, String> ret = new HashMap<String, String>(); Elements tr = table.select("tr"); for (Element element : tr) { addProperty(ret, element); return ret; |
ArrayList | parseTable2ArrayList(Document doc, String selectorRow, String selectorCol) parse Table Array List Elements rows = doc.select(selectorRow); ArrayList<String[]> arrayList = new ArrayList<String[]>(); for (Element row : rows) { Elements cols = row.select(selectorCol); String[] array = new String[cols.size()]; for (int i = 0; i < cols.size(); i++) { array[i] = cols.get(i).html(); arrayList.add(array); return arrayList; |
void | parseTemplate1_1(Element element) parse Templat_ JSONObject jsonObject = new JSONObject(); JSONObject jsonItem = new JSONObject(); Element infotemplatebox = element.getElementsByClass("infotemplatebox").first(); Elements elements = infotemplatebox.select("tr"); for (int i = 0, total = elements.size(); i < total; i++) { Element item = elements.get(i); if (i == 0) { jsonObject.put("cover", item.select("img").attr("src")); ... |