Example usage for org.jsoup.nodes Element tagName

List of usage examples for org.jsoup.nodes Element tagName

Introduction

In this page you can find the example usage for org.jsoup.nodes Element tagName.

Prototype

public String tagName() 

Source Link

Document

Get the name of the tag for this element.

Usage

From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java

private void parseCast(Elements el, MediaCastMember.CastType type, MediaMetadata md) {
    if (el != null && !el.isEmpty()) {
        Element castEl = null;//w w w  .  j  av  a  2s  . co m
        for (Element element : el) {
            if (!element.tagName().equals("option")) { // we get more, just do not take the optionbox
                castEl = element;
            }
        }
        if (castEl == null) {
            LOGGER.debug("meh, no " + type.name() + " found");
            return;
        }
        // walk up to table TR...
        while (!((castEl == null) || (castEl.tagName().equalsIgnoreCase("tr")))) {
            castEl = castEl.parent();
        }
        // ... and take the next table row ^^
        Element tr = castEl.nextElementSibling();

        if (tr != null) {
            for (Element a : tr.getElementsByAttributeValue("valign", "middle")) {
                String act = a.toString();
                String aname = StrgUtils.substr(act, "alt=\"(.*?)\"");
                if (!aname.isEmpty()) {
                    MediaCastMember cm = new MediaCastMember();
                    cm.setName(aname);
                    String id = StrgUtils.substr(act, "id=(.*?)[^\"]\">");
                    if (!id.isEmpty()) {
                        cm.setId(id);
                        // thumb
                        // http://www.ofdb.de/thumbnail.php?cover=images%2Fperson%2F7%2F7689.jpg&size=6
                        // fullsize ;) http://www.ofdb.de/images/person/7/7689.jpg
                        try {
                            String imgurl = URLDecoder
                                    .decode(StrgUtils.substr(act, "images%2Fperson%2F(.*?)&size"), "UTF-8");
                            if (!imgurl.isEmpty()) {
                                imgurl = BASE_URL + "/images/person/" + imgurl;
                            }
                            cm.setImageUrl(imgurl);
                        } catch (Exception e) {
                        }
                    }
                    String arole = StrgUtils.substr(act, "\\.\\.\\. (.*?)</font>").replaceAll("<[^>]*>", "");
                    cm.setCharacter(arole);
                    cm.setType(type);
                    md.addCastMember(cm);
                }
            }
        }
    }
}

From source file:org.wandora.application.tools.extractors.bookmark.BookmarkExtractor.java

private void parseCategory(Element c, TopicMap t) throws TopicMapException, ParseException {
    Topic wc = getWandoraClass(t);// w  w  w  .  jav  a  2 s  .c o m
    Topic root = getOrCreateTopic(t, ROOT_SI, "Bookmark");
    root.setSubjectLocator(new Locator(ROOT_SI));
    root.setDisplayName(LANG, "Bookmark");
    makeSubclassOf(t, root, wc);
    for (Element child : c.children()) {
        if (child.tagName().equals("dt")) {
            for (Element grandChild : child.children()) {
                if (grandChild.tagName().equals("a"))
                    parseItem(grandChild, root, t);
                else if (grandChild.tagName().equals("dl"))
                    parseCategory(child, root, t);
            }
        }
    }
    parseCategory(c, root, t);
}

From source file:org.wandora.application.tools.extractors.bookmark.BookmarkExtractor.java

private void parseCategory(Element c, Topic parent, TopicMap t) throws TopicMapException, ParseException {

    Topic cTopic = parent;/*from   w w w  . j av  a2 s  .co  m*/
    Elements children = c.children();
    for (Element child : children) {
        if (child.tagName().equals("h3")) {
            String cLocator = parent.getSubjectLocator().toString();
            cLocator += "/" + urlEncode(child.html());
            String cName = child.ownText();

            cTopic = getOrCreateTopic(t, cLocator);
            cTopic.setSubjectLocator(new Locator(cLocator));
            cTopic.setBaseName(cName + " (Bookmark)");
            cTopic.setDisplayName(LANG, cName);
            makeSubclassOf(t, cTopic, parent);
        }
    }

    for (Element child : children) {
        if (!child.tagName().equals("dl"))
            continue;

        for (Element grandChild : child.children()) {
            if (!grandChild.tagName().equals("dt"))
                continue;
            for (Element ggChild : grandChild.children()) {
                if (ggChild.tagName().equals("a"))
                    parseItem(ggChild, cTopic, t);
                else if (ggChild.tagName().equals("dl"))
                    parseCategory(grandChild, cTopic, t);
            }

        }

    }
}

From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java

private void findRecommendations(@NotNull Document doc, @NotNull BilingualQueryResultBuilder resultBuilder) {
    // Determine all candidate nodes:
    Elements alternativeNodes = doc.select("div.cc > p > *");

    Language currentLanguage = null;//ww w  .ja  v a2s . co m

    for (Element node : alternativeNodes) {
        // If the next node is a flagicon, try to determine the language for the next entries from the class name
        if (node.tagName().equals("span") && node.hasClass("flagicon")) {
            Set<String> classNames = node.classNames();
            classNames.remove("flagicon");
            for (String className : classNames) {
                Language candidate = Language.getExistingLanguageById(className);
                if (candidate != null) {
                    currentLanguage = candidate;
                    break;
                }
            }
        } else if (node.tagName().equals("a")) {
            String recommendationText = node.text();

            DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder();
            objectBuilder.setLanguage(currentLanguage).setGeneralForm(recommendationText);

            resultBuilder.addSimilarRecommendation(objectBuilder.build());
        }
    }
}

From source file:org.xwiki.validator.HTML5DutchWebGuidelinesValidator.java

private String getMetaCharset() {
    Elements metas = this.html5Document.getElementsByAttributeValue("http-equiv", "Content-Type");
    for (Element meta : metas) {
        if ("meta".equals(meta.tagName())) {
            return meta.attr("content");
        }/*from ww w .  j  av  a 2s  .co m*/

    }
    return null;
}

From source file:uk.co.certait.htmlexporter.css.StyleMap.java

private Style getStyleForTag(Element element) {
    return styles.get(element.tagName());
}

From source file:uk.co.certait.htmlexporter.css.StyleMap.java

private Style getInlineStyle(Element element) {
    Style style = null;/* ww  w.  j  av  a 2 s.  c  o m*/

    if (element.hasAttr("style")) {
        List<Rule> inlineRules;
        try {
            String inlineStyle = element.attr("style").endsWith(";") ? element.attr("style")
                    : element.attr("style") + ";";
            inlineRules = CSSParser.parse("x{" + inlineStyle + "}");
        } catch (Exception e) {
            throw new RuntimeException("Error parsing inline style for element " + element.tagName());
        }

        style = generator.createStyle(inlineRules.get(0), inlineRules.get(0).getSelectors().get(0));
    }

    return style;
}

From source file:us.colloquy.sandbox.TestExtractor.java

@Test
public void useJsoup() {

    String homeDir = System.getProperty("user.home");

    System.out.println(homeDir);//  w  w w  .j  ava2s  .c o m

    //JSOUP API allows to extract all  elements of letters in files

    // File input = new File("samples/OEBPS/Text/0001_1006_2001.xhtml");

    File input = new File("samples/pisma-1904/OEBPS/Text/single_doc.html");

    try {
        Document doc = Jsoup.parse(input, "UTF-8");

        List<Letter> letters = new ArrayList<>(); //our model contains only a subset of fields

        String previousYear = "";

        for (Element element : doc.getElementsByClass("section")) {
            Letter letter = new Letter();

            StringBuilder content = new StringBuilder();

            for (Element child : element.children()) {

                for (Attribute att : child.attributes()) {
                    System.out.println(att.getKey() + " " + att.getValue());
                }

                if ("center".equalsIgnoreCase(child.className())) {
                    String toWhom = child.getElementsByTag("strong").text();

                    if (StringUtils.isEmpty(toWhom)) {
                        toWhom = child.text();
                        // System.out.println(toWhom);
                    }

                    String[] toWhomArray = toWhom.split("(\\s\\s)|(,)");

                    for (String to : toWhomArray) {
                        RussianDate.parseToWhom(letter, to); //here we need to recognize a russian name and store that but for now we store the content
                    }

                    //check if there is anything else here and find date and place - it will be replaced if exists below

                    String entireText = child.text();

                    String tail = entireText.replace(toWhom, "");

                    if (StringUtils.isNotEmpty(tail)) {
                        RussianDate.parseDateAndPlace(letter, tail, previousYear); //a parser that figures out date and place if they are present
                    }

                    // System.out.println("two whom\t " +  child.getElementsByTag("strong").text() );

                } else if ("Data".equalsIgnoreCase(child.className())) {

                    if (child.getElementsByTag("em") != null
                            && StringUtils.isNotEmpty(child.getElementsByTag("em").text())) {
                        RussianDate.parseDateAndPlace(letter, child.getElementsByTag("em").text(),
                                previousYear); //most often date and place are enclosed in em tag

                        if (letter.getDate() != null) {
                            LocalDate localDate = letter.getDate().toInstant().atZone(ZoneId.systemDefault())
                                    .toLocalDate();
                            int year = localDate.getYear();
                            previousYear = year + "";
                        }
                    }

                    // System.out.println("when and where\t " + child.getElementsByTag("em").text());

                } else if ("petit".equalsIgnoreCase(child.className())
                        || "Textpetit_otstup".equalsIgnoreCase(child.className())) {
                    letter.getNotes().add(child.text());

                } else {
                    //System.out.println(child.text() );

                    Elements elements = child.getElementsByTag("sup");

                    for (Element e : elements) {
                        String value = e.text();

                        e.replaceWith(new TextNode("[" + value + "]", null));
                    }

                    for (Element el : child.getAllElements()) {
                        // System.out.println(el.tagName());
                        if ("sup".equalsIgnoreCase(el.tagName())) {
                            content.append(" [" + el.text() + "] ");
                        } else {
                            content.append(el.text());
                        }

                    }

                    content.append("\n");

                }

                //                  System.out.println(child.tag() + "\n" );
                //                  System.out.println(child.outerHtml() + "\n" + child.text());
            }

            letter.setContent(content.toString());
            letters.add(letter);
        }

        ObjectWriter ow = new com.fasterxml.jackson.databind.ObjectMapper().writer().withDefaultPrettyPrinter();

        for (Letter letter : letters) {
            //                if (letter.getDate() == null)
            //                {

            //                        if (StringUtils.isNotEmpty(person.getLastName()))
            //                        {
            String json = ow.writeValueAsString(letter);

            System.out.println(json);
            //                        }

            //}

        }

    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:utils.AutoLinkRenderer.java

/**
 * * Check whether element is links, code tags.
 * @param el//from  w w w .  j av a  2 s .  c  o m
 * @return
 */
private boolean isIgnoreElement(Element el) {
    return ArrayUtils.contains(IGNORE_TAGNAME, el.tagName().toUpperCase());
}

From source file:xxx.web.comments.debates.impl.ProConOrgCommentsParser.java

/**
 * Extracts the document of the quote/*from  ww  w  . j  a  v a  2  s. c  o m*/
 *
 * @param textElement text quote element
 * @return plain string with paragraphs kept
 */
protected static String extractPlainTextFromTextElement(Element textElement) {
    StringBuilder sb = new StringBuilder();

    for (Node childNode : textElement.childNodes()) {
        if (childNode instanceof Element) {
            Element childElement = (Element) childNode;

            String tagName = childElement.tagName();

            if ("p".equals(tagName) || "span".equals(tagName)) {
                sb.append(childElement.text());
                sb.append("\n");
            } else if ("br".equals(tagName)) {
                // prevent double newlines
                sb = new StringBuilder(sb.toString().trim());
                sb.append("\n");
            }

        } else if (childNode instanceof TextNode) {
            TextNode textNode = (TextNode) childNode;

            sb.append(textNode.text());
        }
    }

    // remove leading + ending quotes
    return Utils.normalize(sb.toString()).replaceAll("[(^\")(\"$)]", "");
}