Here you can find the source of getCategoryIds(final Document html)
static int[] getCategoryIds(final Document html)
//package com.java2s; //License from project: Open Source License import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Stream; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; public class Main { static int[] getCategoryIds(final Document html) { // Category taken from breadcrumb, which Wowhead draws with JS :( final String breadcrumbData = html.getElementsByTag("script").stream().map(Element::data) .filter(data -> data.contains("PageTemplate.set({breadcrumb:")).findFirst().get(); // ugh, parsing JS with regexes final String regex = Pattern.quote("PageTemplate.set({breadcrumb: [") + "([0-9,-]+)" + Pattern.quote("]});"); final String[] categoryIds = getRegexGroup(breadcrumbData, regex, 1).get().split(","); return Stream.of(categoryIds).mapToInt(Integer::parseInt).toArray(); }//from w w w. j a v a 2 s .c o m static Optional<String> getRegexGroup(final String str, final String regex, final int group, final int... flags) { int combinedFlags = 0; for (int flag : flags) { combinedFlags |= flag; } final Matcher matcher = Pattern.compile(regex, combinedFlags).matcher(str); if (!matcher.find()) { return Optional.empty(); } return Optional.of(matcher.group(group)); } }