Here you can find the source of getDoc(String path)
public static final Document getDoc(String path) throws IOException
//package com.java2s; //License from project: Apache License import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; public class Main { public static final String LINE_START = "LINE_START_SUB"; public static final Document getDoc(String path) throws IOException { String fileContent = readFile(path, StandardCharsets.UTF_8).replaceAll("(?i)<br[^>]*>", LINE_START) .replaceAll("\n", LINE_START); return Jsoup.parse(fileContent); }// w w w . j a v a2 s .c om private static String readFile(String path, Charset encoding) throws IOException { byte[] encoded = Files.readAllBytes(Paths.get(path)); return new String(encoded, encoding); } }