Example usage for org.jsoup.nodes Element getElementsByClass

List of usage examples for org.jsoup.nodes Element getElementsByClass

Introduction

In this page you can find the example usage for org.jsoup.nodes Element getElementsByClass.

Prototype

public Elements getElementsByClass(String className) 

Source Link

Document

Find elements that have this class, including or under this element.

Usage

From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java

private void processEntry(@NotNull String queryString, @NotNull Element entryNode,
        @NotNull BilingualQueryResultBuilder resultBuilder, @NotNull Language sourceLanguage,
        @NotNull Language targetLanguage) {
    if (!StringUtils.equals(entryNode.tag().getName(), "tr")) {
        LOGGER.warn("Expected <tr> tag - got <{}>", entryNode.tag().getName());
        return;//  ww w  .ja v  a  2s . c  o m
    }
    Elements words = entryNode.getElementsByClass(CLASS_TRANSLATION);

    if (words.size() != 2) {
        LOGGER.warn("Expected 2 elements with class \"" + CLASS_TRANSLATION + "\" - got {}", words.size());
        return;
    }

    BilingualEntryBuilder entryBuilder = ImmutableBilingualEntry.builder();

    entryBuilder.setEntryType(detectEntryType(words.get(0)));
    entryBuilder.setInputObject(processSingleNode(words.get(0), sourceLanguage, queryString));
    entryBuilder.setOutputObject(processSingleNode(words.get(1), targetLanguage, queryString));

    resultBuilder.addBilingualEntry(entryBuilder.build());
}

From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java

private void extractDescription(@NotNull Element element, String queryString,
        DictionaryObjectBuilder objectBuilder) {
    Element descriptionNode = element.getElementsByClass(CLASS_DESCRIPTION).first();
    if (descriptionNode == null) {
        // Try to detect the description node with an alternative class (necessary for synonyms)
        descriptionNode = element.getElementsByClass(CLASS_EXTRA_INFO).first();
    }/*from w w w  .ja v  a  2  s .c om*/
    if (descriptionNode != null) {
        String description = descriptionNode.text();

        description = StringUtils.removeStart(description, DESCRIPTION_BEGIN);
        description = StringUtils.removeEnd(description, DESCRIPTION_END);

        if (!StringUtils.equalsIgnoreCase(description, queryString)) // Set description only if it is different from request string
            objectBuilder.setDescription(StringUtils.strip(description));
    }
}

From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java

private void extractGender(@NotNull Element element, DictionaryObjectBuilder objectBuilder) {
    Element genderNode = element.getElementsByClass(CLASS_GENDER).first();
    if (genderNode != null) {
        String gender = genderNode.text();
        if (GENDER_MAP.containsKey(gender))
            objectBuilder.setGrammaticalGender(GENDER_MAP.get(gender));
    }//from  w  w  w . j  ava2 s  . c o  m
}

From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java

private EntryType detectEntryType(@NotNull Element element) {
    Elements wordTypeNodes = element.getElementsByClass(CLASS_WORDTYPE);

    if (wordTypeNodes.size() < 1) {
        LOGGER.debug("No wordType node found - defaulting to {}", EntryType.UNKNOWN);
        return EntryType.UNKNOWN;
    }//ww  w .  j a  v a2s  . c o  m

    EntryType entryType = ENTRY_TYPE_MAP.getOrDefault(wordTypeNodes.first().text(), EntryType.UNKNOWN);

    if (entryType == EntryType.UNKNOWN)
        LOGGER.debug("Unable to resolve entry type \"{}\"", entryType);

    return entryType;
}

From source file:ru.redcraft.pinterest4j.core.api.PinAPI.java

public List<Comment> getComments(Pin pin) {
    LOG.debug("Getting comments for pin = " + pin);
    List<Comment> comments = new ArrayList<Comment>();
    Document doc = null;//w w  w .j a va2s  .co  m
    String axajResponse = null;
    try {
        axajResponse = new APIRequestBuilder(pin.getURL()).setErrorMessage(PIN_API_ERROR).build().getResponse()
                .getEntity(String.class);
        doc = Jsoup.parse(new JSONObject(axajResponse).getString("footer"));
    } catch (JSONException e) {
        throw new PinterestRuntimeException(PIN_API_ERROR + axajResponse, e);
    }
    for (Element comment : doc.select("div.comment")) {
        long id = Long.valueOf(comment.getElementsByClass("DeleteComment").first().attr("data"));
        Element contentMeta = comment.getElementsByClass("CommenterMeta").first();
        User user = new LazyUser(contentMeta.getElementsByTag("a").first().attr("href").replace("/", ""),
                getApiManager());
        contentMeta.getElementsByTag("a").remove();
        String text = contentMeta.text();
        comments.add(new CommentImpl(id, text, user, pin));
    }
    LOG.debug("Comments extracted: " + comments);
    return comments;
}

From source file:wo.trade.SearchPageScraper.java

public List<TradeItem> parse() {
    List<TradeItem> tradeItems = new LinkedList<>();
    Document doc = Jsoup.parse(page, "UTF-8");

    Element content = doc.getElementById("content");

    Elements items = null;//from w  w  w  .j  ava  2  s  . co  m
    if (content == null) {
        items = doc.getElementsByClass("item");
    } else {
        items = content.getElementsByClass("item");
    }

    for (Element element : items) {

        TradeItem item = new TradeItem();

        item.id = element.attr("id");
        item.id = StringUtils.remove(item.id, "item-container-");
        item.seller = element.attr("data-seller");
        item.thread = element.attr("data-thread");
        item.sellerid = element.attr("data-sellerid");
        item.buyout = element.attr("data-buyout");
        item.ign = element.attr("data-ign");
        item.league = element.attr("data-league");
        item.name = element.attr("data-name");
        item.corrupted = element.getElementsByClass("corrupted").size() > 0;
        item.identified = element.getElementsByClass("item-unid").size() == 0;

        //         System.out.println(String.format("Now parsing item id %s name %s", item.id, item.name));

        Element sockElem = element.getElementsByClass("sockets-raw").get(0);
        item.socketsRaw = sockElem.text();

        Elements accntAgeElement = element.getElementsByAttributeValue("title",
                "account age and highest level");
        if (accntAgeElement != null && !accntAgeElement.isEmpty()) {
            item.ageAndHighLvl = accntAgeElement.get(0).text();
        }

        // ----- Requirements ----- //
        Element reqElem = element.getElementsByClass("requirements").get(0);
        List<TextNode> reqNodes = reqElem.textNodes();
        for (TextNode reqNode : reqNodes) {
            // sample [ Level:&nbsp;37 ,  Strength:&nbsp;42 ,  Intelligence:&nbsp;42 ] 
            String req = StringUtils.trimToEmpty(reqNode.getWholeText());
            req = req.replaceAll(regex_horizontal_whitespace, "");
            req = Util.removeThoseDamnWhiteSpace(req);
            String separator = ":";
            String reqType = trim(substringBefore(req, separator));
            switch (reqType) {
            case "Level":
                item.reqLvl = trim(substringAfter(req, separator));
                break;
            case "Strength":
                item.reqStr = trim(substringAfter(req, separator));
                break;
            case "Intelligence":
                item.reqInt = trim(substringAfter(req, separator));
                break;
            case "Dexterity":
                item.reqDex = trim(substringAfter(req, separator));
                break;
            }
        }
        item.mapQuantity = element.getElementsByAttributeValue("data-name", "mapq").stream().findFirst()
                .map(n -> n.text()).map(s -> substringAfter(s, "Item quantity:"))
                .map(s -> StringUtils.removePattern(s, "[^\\d]")).orElse("")
                .replaceAll(regex_horizontal_whitespace, "").trim();

        // ----- Rarity by checking the item name link class ----- //
        // itemframe0 - normal
        // itemframe1 - magic
        // itemframe2 - rare
        // itemframe3 - unique
        // itemframe4 - gems
        // itemframe5 - currency
        // itemframe6 - divination card
        String itemframeStr = element.getElementsByClass("title").stream().findFirst().map(n -> n.attr("class"))
                .orElse(null);
        itemframeStr = Util.regexMatch("itemframe(\\d)", itemframeStr, 1);
        if (itemframeStr != null) {
            int frame = Integer.parseInt(itemframeStr);
            item.rarity = Rarity.valueOf(frame);
        } else {
            item.rarity = Rarity.unknown;
        }

        // ----- Verify ----- //
        item.dataHash = element.getElementsByAttributeValue("onclick", "verify_modern(this)").stream()
                .findFirst().map(n -> n.attr("data-hash")).orElse("").trim();

        // ----- Mods ----- //
        Elements itemModsElements = element.getElementsByClass("item-mods");
        if (itemModsElements != null && itemModsElements.size() > 0) {
            Element itemMods = itemModsElements.get(0);
            if (itemMods.getElementsByClass("bullet-item").size() != 0) {
                Element bulletItem = itemMods.getElementsByClass("bullet-item").get(0);
                Elements ulMods = bulletItem.getElementsByTag("ul");
                if (ulMods.size() == 2) {
                    // implicit mod
                    Elements implicitLIs = ulMods.get(0).getElementsByTag("li");
                    Element implicitLi = implicitLIs.last();
                    Mod impMod = new Mod(implicitLi.attr("data-name"), implicitLi.attr("data-value"));
                    item.implicitMod = impMod;
                }
                int indexOfExplicitMods = ulMods.size() - 1;
                Elements modsLi = ulMods.get(indexOfExplicitMods).getElementsByTag("li");
                for (Element modLi : modsLi) {
                    // explicit mods
                    Mod mod = new Mod(modLi.attr("data-name"), modLi.attr("data-value"));
                    item.explicitMods.add(mod);
                }
            }
        }

        // ----- Properties ----- //
        // this is the third column data (the first col is the image, second is the mods, reqs)
        item.quality = element.getElementsByAttributeValue("data-name", "q").get(0).text()
                .replaceAll(regex_horizontal_whitespace, "").trim();
        item.physDmgRangeAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_pd").get(0)
                .text().replaceAll(regex_horizontal_whitespace, "").trim();
        item.eleDmgRange = element.getElementsByAttributeValue("data-name", "ed").get(0).text()
                .replaceAll(regex_horizontal_whitespace, "").trim();
        item.attackSpeed = element.getElementsByAttributeValue("data-name", "aps").get(0).text()
                .replaceAll(regex_horizontal_whitespace, "").trim();
        item.dmgAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_dps").get(0).text()
                .replaceAll(regex_horizontal_whitespace, "").trim();
        item.physDmgAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_pdps").get(0)
                .text().replaceAll(regex_horizontal_whitespace, "").trim();
        item.eleDmg = element.getElementsByAttributeValue("data-name", "edps").get(0).text()
                .replaceAll(regex_horizontal_whitespace, "").trim();
        item.armourAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_armour").get(0)
                .text().replaceAll(regex_horizontal_whitespace, "").trim();
        item.evasionAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_evasion").get(0)
                .text().replaceAll(regex_horizontal_whitespace, "").trim();
        item.energyShieldAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_shield")
                .get(0).text().replaceAll(regex_horizontal_whitespace, "").trim();
        item.block = element.getElementsByAttributeValue("data-name", "block").get(0).text()
                .replaceAll(regex_horizontal_whitespace, "").trim();
        item.crit = element.getElementsByAttributeValue("data-name", "crit").get(0).text()
                .replaceAll(regex_horizontal_whitespace, "").trim();
        item.level = element.getElementsByAttributeValue("data-name", "level").get(0).text()
                .replaceAll(regex_horizontal_whitespace, "").trim();
        item.imageUrl = element.getElementsByAttributeValue("alt", "Item icon").get(0).attr("src");
        item.stackSize = asList(split(trimToEmpty(item.imageUrl), '&')).stream()
                .filter(t -> t.startsWith("stackSize=")).findFirst().map(s -> substringAfter(s, "="))
                .orElse(null);

        Elements onlineSpans = element.getElementsMatchingText("online");
        if (!onlineSpans.isEmpty()) {
            item.online = "Online";
        } else {
            item.online = "";
        }

        tradeItems.add(item);
    }
    //      System.out.println("DONE --- Items");

    return tradeItems;
}