Here you can find the source of getInfoboxLines(final Document html, final boolean stripColor)
static List<String> getInfoboxLines(final Document html, final boolean stripColor)
//package com.java2s; //License from project: Open Source License import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; public class Main { static List<String> getInfoboxLines(final Document html, final boolean stripColor) { final Optional<String> infoboxData = html.getElementsByTag("script").stream().map(Element::data) .filter(data -> data.contains("arkup.printHtml")).findFirst(); if (!infoboxData.isPresent()) { return Collections.emptyList(); }// ww w.j av a2 s . c o m final String infoboxMarkup = getRegexGroup(infoboxData.get(), "[Mm]arkup\\.printHtml\\((['\"])(.*)\\1, 'infobox", 2).get().replace("\\/", "/"); // wowhead now escapes forward slashes // Convert \xNN escape sequences to their corresponding characters final Matcher matcher = Pattern.compile("\\\\x([0-9A-Z]{2})").matcher(infoboxMarkup); final StringBuffer sb = new StringBuffer(); while (matcher.find()) { final String hex = matcher.group(1); matcher.appendReplacement(sb, Character.toString((char) Integer.parseInt(hex, 16))); } matcher.appendTail(sb); // We'll get BBCode, convert it to a list of plain text lines return Stream.of(sb.toString().split(Pattern.quote("[/li][li]"))) .flatMap((String line) -> Stream.of(line.split(Pattern.quote("[br]")))) // replace [race/class/money=X] with X .map(line -> line.replaceAll("\\[(?:race|class|money)=([0-9]+)\\]", "$1")) .map(stripColor ? line -> line : line -> line.replaceAll("\\[color=([^\\]]+)\\]", "<$1>")) .map(line -> line.replaceAll("\\[[^\\]]+\\]", "")) // remove all square bracket tags .collect(Collectors.toList()); } static Optional<String> getRegexGroup(final String str, final String regex, final int group, final int... flags) { int combinedFlags = 0; for (int flag : flags) { combinedFlags |= flag; } final Matcher matcher = Pattern.compile(regex, combinedFlags).matcher(str); if (!matcher.find()) { return Optional.empty(); } return Optional.of(matcher.group(group)); } }