Here you can find the source of getFirstSentence(final String html)
Parameter | Description |
---|---|
html | The HTML text. |
public static String getFirstSentence(final String html)
//package com.java2s; /*/*from w ww . ja v a2 s .c o m*/ * Copyright (C) 2012 Klaus Reimer <k@ailis.de> * See LICENSE.md for licensing information. */ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; public class Main { /** * Returns the first sentence of the specified HTML text. * * @param html * The HTML text. * @return The first sentence. */ public static String getFirstSentence(final String html) { final Document newDoc = Document.createShell(""); final Element newBody = newDoc.body(); final Document document = parse(html); final Element body = document.body(); for (final Node node : body.childNodes()) { if (node instanceof TextNode) { final String text = ((TextNode) node).text(); final String[] parts = text.split("\\.(\\s+|$)", 2); if (parts.length == 2) { newBody.appendText(parts[0] + "."); break; } } newBody.appendChild(node.clone()); } return newDoc.body().html().trim(); } /** * Parses the specified html code. * * @param html * The HTML code to parse. * @return The parsed document. */ public static Document parse(final String html) { Document doc = Jsoup.parseBodyFragment(html); doc.outputSettings().prettyPrint(false); return doc; } }