Example usage for org.jsoup.nodes Element text

List of usage examples for org.jsoup.nodes Element text

Introduction

In this page you can find the example usage for org.jsoup.nodes Element text.

Prototype

public String text() 

Source Link

Document

Gets the combined text of this element and all its children.

Usage

From source file:org.apdplat.superword.extract.SynonymAntonymExtractor.java

/**
 * ????//from  www.  jav a2s .  c o m
 * @param html
 * @return
 */
public static SynonymAntonym parseSynonymAntonym(String html, String word) {
    SynonymAntonym synonymAntonym = new SynonymAntonym();
    synonymAntonym.setWord(new Word(word, ""));
    try {
        for (Element element : Jsoup.parse(html).select(SYNONYM_ANTONYM_CSS_PATH)) {
            String type = element.select(TYPE).text().trim();
            LOGGER.debug("type:" + type);
            Elements elements = element.select(WORDS);
            for (Element ele : elements) {
                String w = ele.text().trim();
                LOGGER.debug("word:" + w);
                if (StringUtils.isNotBlank(w)) {
                    switch (type) {
                    case "??":
                        synonymAntonym.addSynonym(new Word(w, ""));
                        break;
                    case "???":
                        synonymAntonym.addAntonym(new Word(w, ""));
                        break;
                    default:
                        LOGGER.error("???????" + type);
                    }
                } else {
                    LOGGER.error("??????" + word);
                }
            }
        }
        LOGGER.info("??????" + synonymAntonym);
    } catch (Exception e) {
        LOGGER.error("??????", e);
    }
    return synonymAntonym;
}

From source file:org.apdplat.superword.tools.Definition.java

public static List<String> parseDefinitionFromHtml(String html, String cssPath, String word,
        Dictionary dictionary) {/*from  w w w.j  a va 2s. c  om*/
    if (dictionary == Dictionary.OXFORD) {
        return parseDefinitionForOxford(html, null);
    }
    if (dictionary == Dictionary.WEBSTER) {
        return parseDefinitionForWebster(html, null);
    }
    List<String> list = new ArrayList<>();
    try {
        Document document = Jsoup.parse(html);
        for (String cp : cssPath.split("\\|")) {
            cp = cp.trim();
            if (StringUtils.isBlank(cp)) {
                continue;
            }
            for (Element element : document.select(cp)) {
                String definition = element.text();
                if (StringUtils.isNotBlank(definition)) {
                    definition = definition.trim();
                    if (!definition.startsWith("?")) {
                        list.add(definition);
                    }
                }
            }
            if (!list.isEmpty()) {
                break;
            }
        }
    } catch (Exception e) {
        LOGGER.error("?" + word, e);
    }
    return list;
}

From source file:org.apdplat.superword.tools.Definition.java

public static List<String> parseDefinitionForWebster(String html, String cssPath) {
    List<String> list = new ArrayList<>();
    try {/*from   ww w  . j  a  v a2s . c o  m*/
        for (Element element : Jsoup.parse(html)
                .select("div.tense-box.quick-def-box.simple-def-box.card-box.def-text div.inner-box-wrapper")) {
            StringBuilder definition = new StringBuilder();
            String partOfSpeech = element.select("div.word-attributes span.main-attr em").text().trim();
            for (Element defElement : element.select(
                    "div.definition-block.def-text ul.definition-list.no-count li p.definition-inner-item span")) {
                String def = defElement.text().trim();
                if (def.length() < 3) {
                    continue;
                }
                if (Character.isAlphabetic(def.charAt(0))) {
                    def = ": " + def;
                } else {
                    int index = 0;
                    while (!Character.isAlphabetic(def.charAt(++index))) {
                        //
                    }
                    def = ": " + def.substring(index);
                }
                definition.append(partOfSpeech).append(" ").append(def);
                list.add(definition.toString());
                definition.setLength(0);
            }
        }
    } catch (Exception e) {
        LOGGER.error("?", e);
    }
    return list;
}

From source file:org.apdplat.superword.tools.IPUtils.java

public static List<String> getIPLocation(String ip) {
    List<String> locations = new ArrayList<>();
    try {//w w  w. j  a  v  a  2s . c o m
        Elements elements = Jsoup.parse(new URL("http://ip138.com/ips138.asp?ip=" + ip), 60000).select("ul li");
        for (Element element : elements) {
            String text = element.text();
            if (StringUtils.isNotBlank(text)) {
                String[] attrs = text.split("");
                if (attrs != null && attrs.length == 2) {
                    locations.add(attrs[1]);
                }
            }
        }
    } catch (Exception e) {
        LOG.error("?IP???", e);
    }
    return locations;
}

From source file:org.apdplat.superword.tools.PrefixSuffixOptimizer.java

/**
 * ?????//w w w .  j a  va 2  s  . c  o m
 *
 * @param element
 */
public static void replace(Element element) {
    String oldText = element.text();
    StringBuilder newText = new StringBuilder();
    System.out.println("oldText: " + oldText);
    String[] items = oldText.trim().replace(".", ",").split(",");
    for (String item : items) {
        item = item.trim();
        if (!StringUtils.isAlpha(item)) {
            newText.append(item).append(", ");
            continue;
        }
        if (StringUtils.isAllUpperCase(item)) {
            newText.append("<strong><a target=\"_blank\" href=\"http://www.iciba.com/").append(item)
                    .append("\">").append(item).append("</a></strong>").append(", ");
        } else {
            newText.append("<a target=\"_blank\" href=\"http://www.iciba.com/").append(item).append("\">")
                    .append(item).append("</a>").append(", ");
        }
        WORDS.add(item.toLowerCase());
    }
    if (newText.length() > 2) {
        String text = newText.substring(0, newText.length() - 2);
        System.out.println("newText: " + text);
        element.html(text);
    }
}

From source file:org.apdplat.superword.tools.Pronunciation.java

public static List<String> parsePronunciationFromHtml(String html, String cssPath, String word,
        Dictionary dictionary) {//w  w w. ja v a 2 s . c o  m
    List<String> list = new ArrayList<>();
    try {
        for (Element element : Jsoup.parse(html).select(cssPath)) {
            String pronunciation = element.text();
            if (StringUtils.isNotBlank(pronunciation)) {
                pronunciation = pronunciation.replace("Pronunciation:", "");
                pronunciation = pronunciation.trim();
                if (!list.contains(pronunciation)) {
                    list.add(pronunciation);
                }
            }
        }
    } catch (Exception e) {
        LOGGER.error("?" + word, e);
    }
    return list;
}

From source file:org.apdplat.superword.tools.ProxyIp.java

private static String getIps(Element element) {
    StringBuilder ip = new StringBuilder();
    Elements all = element.children();
    LOGGER.info("");
    LOGGER.info("?IP?" + element.text());
    AtomicInteger count = new AtomicInteger();
    all.forEach(ele -> {//  ww  w. jav  a2s.  c  o m
        String html = ele.outerHtml();
        LOGGER.info(count.incrementAndGet() + "?" + "HTML" + html.replaceAll("[\n\r]", ""));
        String text = ele.text();
        if (ele.hasAttr("style")
                && (ele.attr("style").equals("display: none;") || ele.attr("style").equals("display:none;"))) {
            LOGGER.info("?" + text);
        } else {
            if (StringUtils.isNotBlank(text)) {
                LOGGER.info("?" + text);
                ip.append(text);
            } else {
                LOGGER.info("");
            }
        }
    });
    LOGGER.info("----------------------------------------------------------------");
    LOGGER.info("?ip: " + ip);
    LOGGER.info("----------------------------------------------------------------");
    Matcher matcher = IP_PATTERN.matcher(ip.toString());
    if (matcher.find()) {
        String _ip = matcher.group();
        LOGGER.info("ip??" + _ip);
        return _ip;
    } else {
        LOGGER.info("ip??" + ip);
    }
    return null;
}

From source file:org.apdplat.superword.tools.WordClassifier.java

public static void parse(String word, String html, Map<String, List<String>> data) {
    Document doc = Jsoup.parse(html);
    Elements es = doc.select(TYPE_CSS_PATH);
    for (Element e : es) {
        String type = e.text();
        LOGGER.debug("?" + type);
        if (StringUtils.isNotBlank(type)) {
            data.putIfAbsent(type, new ArrayList<>());
            data.get(type).add(word);//from w  w w. j  a v a  2 s.  c o m
        }
    }
    es = doc.select(UNFOUND_CSS_PATH);
    for (Element e : es) {
        String notFound = e.text();
        LOGGER.debug("?" + notFound);
        if (StringUtils.isNotBlank(notFound) && (notFound.contains("?")
                || notFound.contains("??"))) {
            NOT_FOUND_WORDS.add(word);
        }
    }
}

From source file:org.apdplat.superword.tools.WordClassifierForYouDao.java

public static void parse(String word, String html, Map<String, List<String>> data) {
    Document doc = Jsoup.parse(html);
    Elements es = doc.select(TYPE_CSS_PATH);
    for (Element e : es) {
        String types = e.text();
        LOGGER.debug("?" + types);
        for (String type : types.split("\\s+")) {
            if (StringUtils.isNotBlank(type)) {
                data.putIfAbsent(type, new ArrayList<>());
                data.get(type).add(word);
            }//from   w  w  w.j a  v a 2  s. com
        }
    }
    es = doc.select(UNFOUND_CSS_PATH);
    for (Element e : es) {
        String notFound = e.text();
        LOGGER.debug("?" + notFound);
        if (StringUtils.isNotBlank(notFound) && (notFound.contains("?")
                || notFound.contains("??"))) {
            NOT_FOUND_WORDS.add(word);
        }
    }
}

From source file:org.apdplat.superword.tools.WordsFetcher.java

public static Set<Word> parse(String html) {
    Set<Word> words = new HashSet<>();
    try {//w  ww  .j  a va  2s .c  o m
        for (Element element : Jsoup.parse(html).select(WORD_CSS_PATH)) {
            String word = element.text().trim();
            if (StringUtils.isNotBlank(word) && WordSources.isEnglish(word)) {
                words.add(new Word(word, ""));
                LOGGER.debug("???:" + word);
            }
        }
    } catch (Exception e) {
        LOGGER.error("???", e);
    }
    return words;
}