List of usage examples for org.jsoup.select Elements first
public Element first()
From source file:io.seldon.importer.articles.AttributesImporterUtils.java
public static Set<String> getTags(Document articleDoc, String tagsCssSelector, String title) { Set<String> tagSet = new HashSet<String>(); if (StringUtils.isNotBlank(tagsCssSelector)) { Elements tagsElements = articleDoc.select(tagsCssSelector); Element tagsElement = tagsElements.first(); List<String> tagsParts; if ((tagsElement != null) && (tagsElement.attr("content") != null) && (StringUtils.isNotBlank(tagsElement.attr("content")))) { tagsParts = AttributesImporterUtils.getTagsPartsFromSingleElement(tagsElement); } else {/*from w ww . j a v a 2 s.co m*/ tagsParts = AttributesImporterUtils.getTagsPartsFromMultipleElement(tagsElements); } List<String> extraTagsParts = AttributesImporterUtils.createExtraTagsPartsFromTitle(title, tagsParts); tagSet.addAll(tagsParts); tagSet.addAll(extraTagsParts); } return tagSet; }
From source file:org.shareok.data.plosdata.PlosUtil.java
public static String getPlosAck(String html) { String ack = ""; Document doc = Jsoup.parse(html.toString()); Elements ackLinks = doc.select("a[id=ack]"); if (!ackLinks.isEmpty()) { Element ackDiv = ackLinks.first().parent(); if (null != ackDiv) { Elements ackParagraphs = ackDiv.select("p"); if (!ackParagraphs.isEmpty()) { for (Element element : ackParagraphs) { if (element.hasText()) ack += element.text(); }// ww w .j av a 2 s . c om } //System.out.println("the ack = "+ack+"\n\n"); } } return ack; }
From source file:org.shareok.data.plosdata.PlosUtil.java
public static String getPlosCitation(String html) { String citation = ""; Document doc = Jsoup.parse(html.toString()); Elements articleInfoDiv = doc.select("div[class=articleinfo]"); if (!articleInfoDiv.isEmpty()) { Element citationParagraph = articleInfoDiv.first().child(0); if (null != citationParagraph) { citation = citationParagraph.text().replace("Citation:", ""); //System.out.println("the citation = "+citation+"\n\n"); }/*from w w w.j a va 2 s . com*/ } return citation; }
From source file:org.shareok.data.plosdata.PlosUtil.java
/** * //from w ww . j a v a 2 s. c om * @param html : The string of the web page source * @return author contribution statement */ public static String getAuthorContributions(String html) { String contributions = ""; Document doc = Jsoup.parse(html.toString()); Elements articleInfoDiv = doc.select("div[class=contributions]"); if (!articleInfoDiv.isEmpty()) { Element contributionsParagraph = articleInfoDiv.first().child(2); if (null != contributionsParagraph) { contributions = contributionsParagraph.text(); //System.out.println("the contributions = "+contributions+"\n\n");System.exit(0); } } return contributions; }
From source file:org.shareok.data.plosdata.PlosUtil.java
/** * //from w w w . ja v a2 s .co m * @param html : The string of the web page source * @return acknowledge statement */ public static String[] getSubjects(String html) { List<String> subjectsList = new ArrayList<>(); Document doc = Jsoup.parse(html.toString()); Elements subjectListDiv = doc.select("div[class=subject-areas-container]"); if (null != subjectListDiv && !subjectListDiv.isEmpty()) { Element subjectList = subjectListDiv.first().child(1); if (null != subjectList) { Elements lis = subjectList.select("li"); if (null != lis && lis.size() > 0) { for (Element li : lis) { Element link = li.child(0); subjectsList.add(link.text()); } } } } if (subjectsList.size() > 0) { return subjectsList.toArray(new String[subjectsList.size()]); } else { return null; } }
From source file:org.keycloak.testsuite.util.SamlClient.java
/** * Extracts and parses value of SAMLResponse input field of a form present in the given page. * * @param responsePage HTML code of the page * @return/*from w ww. j av a2 s . c om*/ */ public static SAMLDocumentHolder extractSamlResponseFromForm(String responsePage) { org.jsoup.nodes.Document theResponsePage = Jsoup.parse(responsePage); Elements samlResponses = theResponsePage.select("input[name=SAMLResponse]"); Elements samlRequests = theResponsePage.select("input[name=SAMLRequest]"); int size = samlResponses.size() + samlRequests.size(); assertThat("Checking uniqueness of SAMLResponse/SAMLRequest input field in the page", size, is(1)); Element respElement = samlResponses.isEmpty() ? samlRequests.first() : samlResponses.first(); return SAMLRequestParser.parseResponsePostBinding(respElement.val()); }
From source file:com.entertailion.android.slideshow.utils.Utils.java
/** * Determine if there is a high resolution icon available for the web site. * //from w w w . ja v a 2s. c o m * @param context * @param url * @return */ public static final String getWebSiteIcon(Context context, String url) { String icon = null; if (url != null) { String data = Utils.getCachedData(context, url, true); if (data != null) { Document doc = Jsoup.parse(data); if (doc != null) { String href = null; Elements metas = doc.select("meta[itemprop=image]"); if (metas.size() > 0) { Element meta = metas.first(); href = meta.attr("abs:content"); // weird jsoup bug: abs doesn't always work if (href == null || href.trim().length() == 0) { href = url + meta.attr("content"); } } if (href == null || href.trim().length() == 0) { // Find the Microsoft tile icon metas = doc.select("meta[name=msapplication-TileImage]"); if (metas.size() > 0) { Element meta = metas.first(); href = meta.attr("abs:content"); // weird jsoup bug: abs doesn't always work if (href == null || href.trim().length() == 0) { href = url + meta.attr("content"); } } } if (href == null || href.trim().length() == 0) { // Find the Apple touch icon Elements links = doc.select("link[rel=apple-touch-icon]"); if (links.size() > 0) { Element link = links.first(); href = link.attr("abs:href"); // weird jsoup bug: abs doesn't always work if (href == null || href.trim().length() == 0) { href = url + link.attr("href"); } } } if (href == null || href.trim().length() == 0) { // Find the Facebook open graph icon metas = doc.select("meta[property=og:image]"); if (metas.size() > 0) { Element link = metas.first(); href = link.attr("abs:content"); // weird jsoup bug: abs doesn't always work if (href == null || href.trim().length() == 0) { href = url + link.attr("content"); } } } if (href != null && href.trim().length() > 0) { try { Bitmap bitmap = Utils.getBitmapFromURL(href); if (bitmap != null) { icon = "web_site_icon_" + Utils.clean(href) + ".png"; Utils.saveToFile(context, bitmap, bitmap.getWidth(), bitmap.getHeight(), icon); bitmap.recycle(); } } catch (Exception e) { Log.d(LOG_TAG, "getWebSiteIcon", e); } } } } } return icon; }
From source file:com.astamuse.asta4d.render.RenderUtil.java
private final static boolean isBlockedByParentSnippet(Document doc, Element elem) { boolean isBlocked; String blockingId = elem.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_BLOCK); if (blockingId.isEmpty()) { // empty block id means there is no parent snippet that need to be // aware. if the original block is from a embed template, it means // that all of the parent snippets have been finished or this // element would not be imported now. isBlocked = false;//from ww w. j av a 2 s. c o m } else { String parentSelector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR, ExtNodeConstants.ATTR_SNIPPET_REF, blockingId); Elements parentSnippetSearch = elem.parents().select(parentSelector); if (parentSnippetSearch.isEmpty()) { isBlocked = false; } else { Element parentSnippet = parentSnippetSearch.first(); if (parentSnippet.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS) .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED)) { isBlocked = false; } else { isBlocked = true; } } } return isBlocked; }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
private static Element getSingleElementByQuery(Element root, String query) { Elements elementsByQuery = root.select(query); if (elementsByQuery.size() > 1) { String error = "Found " + elementsByQuery.size() + " elements matching query \"" + query + "\" inside of " + root.tagName() + "-" + root.className(); throw new RuntimeException(error + root.html()); }//ww w . j av a2 s. com return elementsByQuery.first(); }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
private static Element getSingleElementByClass(Element root, String className) { Elements elementsByClass = root.getElementsByClass(className); if (elementsByClass.size() != 1) { String error = "Found " + elementsByClass.size() + " elements with class " + className + " inside of " + root.tagName() + "-" + root.className(); throw new RuntimeException(error + root.html()); }// w ww . j av a2 s . c om return elementsByClass.first(); }