List of usage examples for org.jsoup.nodes Element getElementsByClass
public Elements getElementsByClass(String className)
From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java
private void processEntry(@NotNull String queryString, @NotNull Element entryNode, @NotNull BilingualQueryResultBuilder resultBuilder, @NotNull Language sourceLanguage, @NotNull Language targetLanguage) { if (!StringUtils.equals(entryNode.tag().getName(), "tr")) { LOGGER.warn("Expected <tr> tag - got <{}>", entryNode.tag().getName()); return;// ww w .ja v a 2s . c o m } Elements words = entryNode.getElementsByClass(CLASS_TRANSLATION); if (words.size() != 2) { LOGGER.warn("Expected 2 elements with class \"" + CLASS_TRANSLATION + "\" - got {}", words.size()); return; } BilingualEntryBuilder entryBuilder = ImmutableBilingualEntry.builder(); entryBuilder.setEntryType(detectEntryType(words.get(0))); entryBuilder.setInputObject(processSingleNode(words.get(0), sourceLanguage, queryString)); entryBuilder.setOutputObject(processSingleNode(words.get(1), targetLanguage, queryString)); resultBuilder.addBilingualEntry(entryBuilder.build()); }
From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java
private void extractDescription(@NotNull Element element, String queryString, DictionaryObjectBuilder objectBuilder) { Element descriptionNode = element.getElementsByClass(CLASS_DESCRIPTION).first(); if (descriptionNode == null) { // Try to detect the description node with an alternative class (necessary for synonyms) descriptionNode = element.getElementsByClass(CLASS_EXTRA_INFO).first(); }/*from w w w .ja v a 2 s .c om*/ if (descriptionNode != null) { String description = descriptionNode.text(); description = StringUtils.removeStart(description, DESCRIPTION_BEGIN); description = StringUtils.removeEnd(description, DESCRIPTION_END); if (!StringUtils.equalsIgnoreCase(description, queryString)) // Set description only if it is different from request string objectBuilder.setDescription(StringUtils.strip(description)); } }
From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java
private void extractGender(@NotNull Element element, DictionaryObjectBuilder objectBuilder) { Element genderNode = element.getElementsByClass(CLASS_GENDER).first(); if (genderNode != null) { String gender = genderNode.text(); if (GENDER_MAP.containsKey(gender)) objectBuilder.setGrammaticalGender(GENDER_MAP.get(gender)); }//from w w w . j ava2 s . c o m }
From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java
private EntryType detectEntryType(@NotNull Element element) { Elements wordTypeNodes = element.getElementsByClass(CLASS_WORDTYPE); if (wordTypeNodes.size() < 1) { LOGGER.debug("No wordType node found - defaulting to {}", EntryType.UNKNOWN); return EntryType.UNKNOWN; }//ww w . j a v a2s . c o m EntryType entryType = ENTRY_TYPE_MAP.getOrDefault(wordTypeNodes.first().text(), EntryType.UNKNOWN); if (entryType == EntryType.UNKNOWN) LOGGER.debug("Unable to resolve entry type \"{}\"", entryType); return entryType; }
From source file:ru.redcraft.pinterest4j.core.api.PinAPI.java
public List<Comment> getComments(Pin pin) { LOG.debug("Getting comments for pin = " + pin); List<Comment> comments = new ArrayList<Comment>(); Document doc = null;//w w w .j a va2s .co m String axajResponse = null; try { axajResponse = new APIRequestBuilder(pin.getURL()).setErrorMessage(PIN_API_ERROR).build().getResponse() .getEntity(String.class); doc = Jsoup.parse(new JSONObject(axajResponse).getString("footer")); } catch (JSONException e) { throw new PinterestRuntimeException(PIN_API_ERROR + axajResponse, e); } for (Element comment : doc.select("div.comment")) { long id = Long.valueOf(comment.getElementsByClass("DeleteComment").first().attr("data")); Element contentMeta = comment.getElementsByClass("CommenterMeta").first(); User user = new LazyUser(contentMeta.getElementsByTag("a").first().attr("href").replace("/", ""), getApiManager()); contentMeta.getElementsByTag("a").remove(); String text = contentMeta.text(); comments.add(new CommentImpl(id, text, user, pin)); } LOG.debug("Comments extracted: " + comments); return comments; }
From source file:wo.trade.SearchPageScraper.java
public List<TradeItem> parse() { List<TradeItem> tradeItems = new LinkedList<>(); Document doc = Jsoup.parse(page, "UTF-8"); Element content = doc.getElementById("content"); Elements items = null;//from w w w .j ava 2 s . co m if (content == null) { items = doc.getElementsByClass("item"); } else { items = content.getElementsByClass("item"); } for (Element element : items) { TradeItem item = new TradeItem(); item.id = element.attr("id"); item.id = StringUtils.remove(item.id, "item-container-"); item.seller = element.attr("data-seller"); item.thread = element.attr("data-thread"); item.sellerid = element.attr("data-sellerid"); item.buyout = element.attr("data-buyout"); item.ign = element.attr("data-ign"); item.league = element.attr("data-league"); item.name = element.attr("data-name"); item.corrupted = element.getElementsByClass("corrupted").size() > 0; item.identified = element.getElementsByClass("item-unid").size() == 0; // System.out.println(String.format("Now parsing item id %s name %s", item.id, item.name)); Element sockElem = element.getElementsByClass("sockets-raw").get(0); item.socketsRaw = sockElem.text(); Elements accntAgeElement = element.getElementsByAttributeValue("title", "account age and highest level"); if (accntAgeElement != null && !accntAgeElement.isEmpty()) { item.ageAndHighLvl = accntAgeElement.get(0).text(); } // ----- Requirements ----- // Element reqElem = element.getElementsByClass("requirements").get(0); List<TextNode> reqNodes = reqElem.textNodes(); for (TextNode reqNode : reqNodes) { // sample [ Level: 37 , Strength: 42 , Intelligence: 42 ] String req = StringUtils.trimToEmpty(reqNode.getWholeText()); req = req.replaceAll(regex_horizontal_whitespace, ""); req = Util.removeThoseDamnWhiteSpace(req); String separator = ":"; String reqType = trim(substringBefore(req, separator)); switch (reqType) { case "Level": item.reqLvl = trim(substringAfter(req, separator)); break; case "Strength": item.reqStr = trim(substringAfter(req, separator)); break; case "Intelligence": item.reqInt = trim(substringAfter(req, separator)); break; case "Dexterity": item.reqDex = trim(substringAfter(req, separator)); break; } } item.mapQuantity = element.getElementsByAttributeValue("data-name", "mapq").stream().findFirst() .map(n -> n.text()).map(s -> substringAfter(s, "Item quantity:")) .map(s -> StringUtils.removePattern(s, "[^\\d]")).orElse("") .replaceAll(regex_horizontal_whitespace, "").trim(); // ----- Rarity by checking the item name link class ----- // // itemframe0 - normal // itemframe1 - magic // itemframe2 - rare // itemframe3 - unique // itemframe4 - gems // itemframe5 - currency // itemframe6 - divination card String itemframeStr = element.getElementsByClass("title").stream().findFirst().map(n -> n.attr("class")) .orElse(null); itemframeStr = Util.regexMatch("itemframe(\\d)", itemframeStr, 1); if (itemframeStr != null) { int frame = Integer.parseInt(itemframeStr); item.rarity = Rarity.valueOf(frame); } else { item.rarity = Rarity.unknown; } // ----- Verify ----- // item.dataHash = element.getElementsByAttributeValue("onclick", "verify_modern(this)").stream() .findFirst().map(n -> n.attr("data-hash")).orElse("").trim(); // ----- Mods ----- // Elements itemModsElements = element.getElementsByClass("item-mods"); if (itemModsElements != null && itemModsElements.size() > 0) { Element itemMods = itemModsElements.get(0); if (itemMods.getElementsByClass("bullet-item").size() != 0) { Element bulletItem = itemMods.getElementsByClass("bullet-item").get(0); Elements ulMods = bulletItem.getElementsByTag("ul"); if (ulMods.size() == 2) { // implicit mod Elements implicitLIs = ulMods.get(0).getElementsByTag("li"); Element implicitLi = implicitLIs.last(); Mod impMod = new Mod(implicitLi.attr("data-name"), implicitLi.attr("data-value")); item.implicitMod = impMod; } int indexOfExplicitMods = ulMods.size() - 1; Elements modsLi = ulMods.get(indexOfExplicitMods).getElementsByTag("li"); for (Element modLi : modsLi) { // explicit mods Mod mod = new Mod(modLi.attr("data-name"), modLi.attr("data-value")); item.explicitMods.add(mod); } } } // ----- Properties ----- // // this is the third column data (the first col is the image, second is the mods, reqs) item.quality = element.getElementsByAttributeValue("data-name", "q").get(0).text() .replaceAll(regex_horizontal_whitespace, "").trim(); item.physDmgRangeAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_pd").get(0) .text().replaceAll(regex_horizontal_whitespace, "").trim(); item.eleDmgRange = element.getElementsByAttributeValue("data-name", "ed").get(0).text() .replaceAll(regex_horizontal_whitespace, "").trim(); item.attackSpeed = element.getElementsByAttributeValue("data-name", "aps").get(0).text() .replaceAll(regex_horizontal_whitespace, "").trim(); item.dmgAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_dps").get(0).text() .replaceAll(regex_horizontal_whitespace, "").trim(); item.physDmgAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_pdps").get(0) .text().replaceAll(regex_horizontal_whitespace, "").trim(); item.eleDmg = element.getElementsByAttributeValue("data-name", "edps").get(0).text() .replaceAll(regex_horizontal_whitespace, "").trim(); item.armourAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_armour").get(0) .text().replaceAll(regex_horizontal_whitespace, "").trim(); item.evasionAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_evasion").get(0) .text().replaceAll(regex_horizontal_whitespace, "").trim(); item.energyShieldAtMaxQuality = element.getElementsByAttributeValue("data-name", "quality_shield") .get(0).text().replaceAll(regex_horizontal_whitespace, "").trim(); item.block = element.getElementsByAttributeValue("data-name", "block").get(0).text() .replaceAll(regex_horizontal_whitespace, "").trim(); item.crit = element.getElementsByAttributeValue("data-name", "crit").get(0).text() .replaceAll(regex_horizontal_whitespace, "").trim(); item.level = element.getElementsByAttributeValue("data-name", "level").get(0).text() .replaceAll(regex_horizontal_whitespace, "").trim(); item.imageUrl = element.getElementsByAttributeValue("alt", "Item icon").get(0).attr("src"); item.stackSize = asList(split(trimToEmpty(item.imageUrl), '&')).stream() .filter(t -> t.startsWith("stackSize=")).findFirst().map(s -> substringAfter(s, "=")) .orElse(null); Elements onlineSpans = element.getElementsMatchingText("online"); if (!onlineSpans.isEmpty()) { item.online = "Online"; } else { item.online = ""; } tradeItems.add(item); } // System.out.println("DONE --- Items"); return tradeItems; }