Java HTML Parse Jsoup getDoc(String path)

Description

get Doc

License

Apache License

Declaration

public static final Document getDoc(String path) throws IOException

Method Source Code


//package com.java2s;
//License from project: Apache License 

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class Main {
    public static final String LINE_START = "LINE_START_SUB";

    public static final Document getDoc(String path) throws IOException {
        String fileContent = readFile(path, StandardCharsets.UTF_8).replaceAll("(?i)<br[^>]*>", LINE_START)
                .replaceAll("\n", LINE_START);

        return Jsoup.parse(fileContent);

    }// w w w .  j  a  v  a2  s  .c  om

    private static String readFile(String path, Charset encoding) throws IOException {
        byte[] encoded = Files.readAllBytes(Paths.get(path));
        return new String(encoded, encoding);
    }
}

Java HTML Parse Jsoup getDoc(String path)

Description

License

Declaration

Method Source Code

Related