List of usage examples for org.jsoup.nodes Element attr
public String attr(String attributeKey)
From source file:io.andyc.papercut.api.PrintApi.java
/** * Extracts the form data required to submit to the print service in order * to set the printer to print to/* w ww.ja v a 2 s . c o m*/ * * @param printerType {Document} - the Document to parse and extract the * printer type form * @param printJob {PrintJob} - the print job to be worked on * * @return {Map<String, String>} - the resulting form data to submit to the * print service */ static Map<String, String> buildPrinterTypeData(Document printerType, PrintJob printJob) { Elements printerTypeElements = printerType.select("form#form").select("input"); Map<String, String> result = new HashMap<>(); for (Element element : printerTypeElements) { String key = element.attr("name"); String value = element.attr("value"); if (Objects.equals(key, "$Submit$0")) { continue; } if (key.equals("$RadioGroup")) { value = printJob.getPrinterOption().getValue(); } result.put(key, value); } return result; }
From source file:io.andyc.papercut.api.PrintApi.java
/** * Parses the set number of copies page and builds the data required to * submit the form/*from w w w .j a va 2s .c om*/ * * @param printJob {PrintJon} - the print job in question * @param prevDoc {Document} - the HTML page containing the form to set the * number of copies to be printed * * @return {Map<String, String>} - a HashMap containing the form data */ static Map<String, String> buildSetNumberOfCopiesData(Document prevDoc, PrintJob printJob) { Map<String, String> result = new HashMap<>(); for (Element element : prevDoc.select("form").select("input")) { String name = element.attr("name"); String value = element.attr("value"); if (Objects.equals(name, "$Submit$0")) { continue; } if (Objects.equals(name, "copies")) { value = String.valueOf(printJob.getCopies()); } if (Objects.equals(value, "")) { continue; } result.put(name, value); } return result; }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
private static Map<String, String> getInheritedMethods(Element summaryAnchor) { Map<String, String> inherited = new HashMap<>(); if (summaryAnchor == null) return inherited; summaryAnchor = summaryAnchor.parent(); Elements inheritAnchors = summaryAnchor.select("a[name^=\"methods.inherited.from.class\"]"); for (Element inheritAnchor : inheritAnchors) { if (inheritAnchor.siblingElements().size() != 2) throw new RuntimeException("Got unexpected html while parsing inherited methods from class " + inheritAnchor.attr("name")); Element next = inheritAnchor.nextElementSibling(); if (!next.tagName().equals("h3")) throw new RuntimeException("Got unexpected html while parsing inherited methods from class " + inheritAnchor.attr("name")); Element sub = next.children().last(); if (sub == null || !sub.tagName().equals("a")) continue; String parent = sub.text().toLowerCase(); next = next.nextElementSibling(); if (!next.tagName().equals("code")) throw new RuntimeException("Got unexpected html while parsing inherited methods from class " + inheritAnchor.attr("name")); for (sub = next.children().first(); sub != null; sub = sub.nextElementSibling()) { if (sub.tagName().equals("a")) { inherited.putIfAbsent(sub.text().toLowerCase(), parent); }//from w ww .ja va2 s . c om } } return inherited; }
From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java
private static String reformatXHtml(final String inputXhtml, final Map<String, ConfluenceLink> confluenceLinkMap) { final Document document = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser()); document.outputSettings().prettyPrint(false); document.outputSettings().escapeMode(xhtml); document.outputSettings().charset("UTF-8"); final Elements linkElements = document.select("a"); for (final Element linkElement : linkElements) { final String originalHref = linkElement.attr("href"); final ConfluenceLink confluenceLink = confluenceLinkMap.get(originalHref); if (confluenceLink == null) { LOG.debug("NO LINK MAPPING FOUND TO COVERT LINK: {}", originalHref); continue; }/*w ww . j av a 2 s .co m*/ final String confluenceLinkMarkup = confluenceLink.getConfluenceLinkMarkup(); LOG.debug("LINK CONVERSION: {} -> {}", originalHref, confluenceLinkMarkup); linkElement.before(confluenceLinkMarkup); linkElement.html(""); linkElement.unwrap(); } reformatXHtmlHeadings(document, "h2"); reformatXHtmlHeadings(document, "h3"); reformatXHtmlHeadings(document, "#toctitle"); final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get(); if (swaggerConfluenceConfig.getPaginationMode() == PaginationMode.SINGLE_PAGE) { if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) { reformatXHtmlBreakAfterElements(document, "#toc"); } reformatXHtmlBreakAfterElements(document, ".sect1"); } reformatXHtmlSpacing(document.select(".sect2")); reformatXHtmlSpacing(document.select(".sect3")); return document.html(); }
From source file:com.screenslicer.core.util.BrowserUtil.java
private static WebElement toElement(Browser browser, HtmlNode htmlNode, Element body, boolean recurse) throws ActionFailed { if (body == null) { body = BrowserUtil.openElement(browser, true, null, null, null, null); }/* w ww. ja v a 2 s. c om*/ if (!CommonUtil.isEmpty(htmlNode.id)) { Elements elements = body.getElementsByAttributeValue("id", htmlNode.id); if (elements.size() == 1) { WebElement element = toElement(browser, elements.get(0), htmlNode, recurse); if (element != null) { return element; } } } List<Elements> selected = new ArrayList<Elements>(); if (!CommonUtil.isEmpty(htmlNode.tagName)) { selected.add(body.getElementsByTag(htmlNode.tagName)); } else if (!CommonUtil.isEmpty(htmlNode.href)) { selected.add(body.getElementsByTag("a")); } if (!CommonUtil.isEmpty(htmlNode.id)) { selected.add(body.getElementsByAttributeValue("id", htmlNode.id)); } if (!CommonUtil.isEmpty(htmlNode.name)) { selected.add(body.getElementsByAttributeValue("name", htmlNode.name)); } if (!CommonUtil.isEmpty(htmlNode.type)) { selected.add(body.getElementsByAttributeValue("type", htmlNode.type)); } if (!CommonUtil.isEmpty(htmlNode.value)) { selected.add(body.getElementsByAttributeValue("value", htmlNode.value)); } if (!CommonUtil.isEmpty(htmlNode.title)) { selected.add(body.getElementsByAttributeValue("title", htmlNode.title)); } if (!CommonUtil.isEmpty(htmlNode.role)) { selected.add(body.getElementsByAttributeValue("role", htmlNode.role)); } if (!CommonUtil.isEmpty(htmlNode.alt)) { selected.add(body.getElementsByAttributeValue("alt", htmlNode.alt)); } if (htmlNode.classes != null && htmlNode.classes.length > 0) { Map<Element, Integer> found = new HashMap<Element, Integer>(); for (int i = 0; i < htmlNode.classes.length; i++) { Elements elements = body.getElementsByClass(htmlNode.classes[i]); for (Element element : elements) { if (!found.containsKey(element)) { found.put(element, 0); } found.put(element, found.get(element) + 1); } } Elements elements = new Elements(); for (int i = htmlNode.classes.length; i > 0; i--) { for (Map.Entry<Element, Integer> entry : found.entrySet()) { if (entry.getValue() == i) { elements.add(entry.getKey()); } } if (!elements.isEmpty()) { break; } } selected.add(elements); } if (!CommonUtil.isEmpty(htmlNode.href)) { Elements hrefs = body.getElementsByAttribute("href"); Elements toAdd = new Elements(); String currentUrl = browser.getCurrentUrl(); String hrefGiven = htmlNode.href; for (Element href : hrefs) { String hrefFound = href.attr("href"); if (hrefGiven.equalsIgnoreCase(hrefFound)) { toAdd.add(href); toAdd.add(href); toAdd.add(href); } else if (htmlNode.fuzzy && hrefFound != null && hrefFound.endsWith(hrefGiven)) { toAdd.add(href); toAdd.add(href); } else if (htmlNode.fuzzy && hrefFound != null && hrefFound.contains(hrefGiven)) { toAdd.add(href); } else { String uriGiven = UrlUtil.toCanonicalUri(currentUrl, hrefGiven); String uriFound = UrlUtil.toCanonicalUri(currentUrl, hrefFound); if (uriGiven.equalsIgnoreCase(uriFound)) { toAdd.add(href); } } } selected.add(toAdd); } if (!CommonUtil.isEmpty(htmlNode.innerText)) { selected.add(body.getElementsMatchingText(Pattern.quote(htmlNode.innerText))); selected.add(body.getElementsMatchingText("^\\s*" + Pattern.quote(htmlNode.innerText) + "\\s*$")); } if (htmlNode.multiple != null) { selected.add(body.getElementsByAttribute("multiple")); } Map<Element, Integer> votes = new HashMap<Element, Integer>(); for (Elements elements : selected) { for (Element element : elements) { if (!votes.containsKey(element)) { votes.put(element, 0); } votes.put(element, votes.get(element) + 2); if (!NodeUtil.isHidden(element)) { votes.put(element, votes.get(element) + 1); } } } int maxVote = 0; Element maxElement = null; for (Map.Entry<Element, Integer> entry : votes.entrySet()) { if (entry.getValue() > maxVote) { maxVote = entry.getValue(); maxElement = entry.getKey(); } } return toElement(browser, maxElement, htmlNode, recurse); }
From source file:io.seldon.importer.articles.FileItemAttributesImporter.java
public static Map<String, String> getAttributes(String url, String existingCategory) { ItemProcessResult itemProcessResult = new ItemProcessResult(); itemProcessResult.client_item_id = url; itemProcessResult.extraction_status = "EXTRACTION_FAILED"; logger.info("Trying to get attributes for " + url); Map<String, String> attributes = null; String title = ""; String category = ""; String subCategory = ""; String img_url = ""; String description = ""; String tags = ""; String leadtext = ""; String link = ""; String publishDate = ""; String domain = ""; try {/*w w w . ja v a 2 s .com*/ long now = System.currentTimeMillis(); long timeSinceLastRequest = now - lastUrlFetchTime; if (timeSinceLastRequest < minFetchGapMsecs) { long timeToSleep = minFetchGapMsecs - timeSinceLastRequest; logger.info( "Sleeping " + timeToSleep + "msecs as time since last fetch is " + timeSinceLastRequest); Thread.sleep(timeToSleep); } Document articleDoc = Jsoup.connect(url).userAgent("SeldonBot/1.0").timeout(httpGetTimeout).get(); lastUrlFetchTime = System.currentTimeMillis(); //get IMAGE URL if (StringUtils.isNotBlank(imageCssSelector)) { Element imageElement = articleDoc.select(imageCssSelector).first(); if (imageElement != null) { if (imageElement.attr("content") != null) { img_url = imageElement.attr("content"); } if (StringUtils.isBlank(img_url) && imageElement.attr("src") != null) { img_url = imageElement.attr("src"); } if (StringUtils.isBlank(img_url) && imageElement.attr("href") != null) { img_url = imageElement.attr("href"); } } } if (StringUtils.isBlank(img_url) && StringUtils.isNotBlank(defImageUrl)) { logger.info("Setting image to default: " + defImageUrl); img_url = defImageUrl; } img_url = StringUtils.strip(img_url); //get TITLE if (StringUtils.isNotBlank(titleCssSelector)) { Element titleElement = articleDoc.select(titleCssSelector).first(); if (titleElement != null && titleElement.attr("content") != null) { title = titleElement.attr("content"); } } //get Lead Text if (StringUtils.isNotBlank(leadTextCssSelector)) { Element leadElement = articleDoc.select(leadTextCssSelector).first(); if (leadElement != null && leadElement.attr("content") != null) { leadtext = leadElement.attr("content"); } } //get publish date if (StringUtils.isNotBlank(publishDateCssSelector)) { //2013-01-21T10:40:55Z Element pubElement = articleDoc.select(publishDateCssSelector).first(); if (pubElement != null && pubElement.attr("content") != null) { String pubtext = pubElement.attr("content"); SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH); Date result = null; try { result = df.parse(pubtext); } catch (ParseException e) { logger.info("Failed to parse date withUTC format " + pubtext); } //try a simpler format df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH); try { result = df.parse(pubtext); } catch (ParseException e) { logger.info("Failed to parse date " + pubtext); } if (result != null) publishDate = dateFormatter.format(result); else logger.error("Failed to parse date " + pubtext); } } //get Link if (StringUtils.isNotBlank(linkCssSelector)) { Element linkElement = articleDoc.select(linkCssSelector).first(); if (linkElement != null && linkElement.attr("content") != null) { link = linkElement.attr("content"); } } //get CONTENT if (StringUtils.isNotBlank(textCssSelector)) { Element descriptionElement = articleDoc.select(textCssSelector).first(); if (descriptionElement != null) description = Jsoup.parse(descriptionElement.html()).text(); } //get TAGS Set<String> tagSet = AttributesImporterUtils.getTags(articleDoc, tagsCssSelector, title); if (tagSet.size() > 0) tags = CollectionTools.join(tagSet, ","); //get CATEGORY - client specific if (StringUtils.isNotBlank(categoryCssSelector)) { Element categoryElement = articleDoc.select(categoryCssSelector).first(); if (categoryElement != null && categoryElement.attr("content") != null) { category = categoryElement.attr("content"); if (StringUtils.isNotBlank(category)) category = category.toUpperCase(); } } else if (StringUtils.isNotBlank(categoryClassPrefix)) { String className = "io.seldon.importer.articles.category." + categoryClassPrefix + "CategoryExtractor"; Class<?> clazz = Class.forName(className); Constructor<?> ctor = clazz.getConstructor(); CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance(); category = extractor.getCategory(url, articleDoc); } //get Sub CATEGORY - client specific if (StringUtils.isNotBlank(subCategoryCssSelector)) { Element subCategoryElement = articleDoc.select(subCategoryCssSelector).first(); if (subCategoryElement != null && subCategoryElement.attr("content") != null) { subCategory = subCategoryElement.attr("content"); if (StringUtils.isNotBlank(subCategory)) subCategory = category.toUpperCase(); } } else if (StringUtils.isNotBlank(subCategoryClassPrefix)) { String className = "io.seldon.importer.articles.category." + subCategoryClassPrefix + "SubCategoryExtractor"; Class<?> clazz = Class.forName(className); Constructor<?> ctor = clazz.getConstructor(); CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance(); subCategory = extractor.getCategory(url, articleDoc); } // Get domain if (domainIsNeeded) { domain = getDomain(url); } if (StringUtils.isNotBlank(title) && (imageNotNeeded || StringUtils.isNotBlank(img_url)) && (categoryNotNeeded || StringUtils.isNotBlank(category)) && (!domainIsNeeded || StringUtils.isNotBlank(domain))) { attributes = new HashMap<String, String>(); attributes.put(TITLE, title); if (StringUtils.isNotBlank(category)) attributes.put(CATEGORY, category); if (StringUtils.isNotBlank(subCategory)) attributes.put(SUBCATEGORY, subCategory); if (StringUtils.isNotBlank(link)) attributes.put(LINK, link); if (StringUtils.isNotBlank(leadtext)) attributes.put(LEAD_TEXT, leadtext); if (StringUtils.isNotBlank(img_url)) attributes.put(IMG_URL, img_url); if (StringUtils.isNotBlank(tags)) attributes.put(TAGS, tags); attributes.put(CONTENT_TYPE, VERIFIED_CONTENT_TYPE); if (StringUtils.isNotBlank(description)) attributes.put(DESCRIPTION, description); if (StringUtils.isNotBlank(publishDate)) attributes.put(PUBLISH_DATE, publishDate); if (StringUtils.isNotBlank(domain)) attributes.put(DOMAIN, domain); System.out.println("Item: " + url + "; Category: " + category); itemProcessResult.extraction_status = "EXTRACTION_SUCCEEDED"; } else { logger.warn("Failed to get title for article " + url); logger.warn("[title=" + title + ", img_url=" + img_url + ", category=" + category + ", domain=" + domain + "]"); } { // check for failures for the log result if (StringUtils.isBlank(title)) { itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "title"; } if (!imageNotNeeded && StringUtils.isBlank(img_url)) { itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "img_url"; } if (!categoryNotNeeded && StringUtils.isBlank(category)) { itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "category"; } } } catch (Exception e) { logger.warn("Article: " + url + ". Attributes import FAILED", e); itemProcessResult.error = e.toString(); } AttributesImporterUtils.logResult(logger, itemProcessResult); return attributes; }
From source file:io.seldon.importer.articles.ItemAttributesImporter.java
public static Map<String, String> getAttributes(String url, String existingCategory) { ItemProcessResult itemProcessResult = new ItemProcessResult(); itemProcessResult.client_item_id = url; itemProcessResult.extraction_status = "EXTRACTION_FAILED"; logger.info("Trying to get attributes for " + url); Map<String, String> attributes = null; String title = ""; String category = ""; String subCategory = ""; String img_url = ""; String description = ""; String tags = ""; String leadtext = ""; String link = ""; String publishDate = ""; String domain = ""; try {//from ww w.j a v a 2s . co m long now = System.currentTimeMillis(); long timeSinceLastRequest = now - lastUrlFetchTime; if (timeSinceLastRequest < minFetchGapMsecs) { long timeToSleep = minFetchGapMsecs - timeSinceLastRequest; logger.info( "Sleeping " + timeToSleep + "msecs as time since last fetch is " + timeSinceLastRequest); Thread.sleep(timeToSleep); } Document articleDoc = Jsoup.connect(url).userAgent("SeldonBot/1.0").timeout(httpGetTimeout).get(); lastUrlFetchTime = System.currentTimeMillis(); //get IMAGE URL if (StringUtils.isNotBlank(imageCssSelector)) { Element imageElement = articleDoc.select(imageCssSelector).first(); if (imageElement != null && imageElement.attr("content") != null) { img_url = imageElement.attr("content"); } if (imageElement != null && StringUtils.isBlank(img_url)) { img_url = imageElement.attr("src"); } if (imageElement != null && StringUtils.isBlank(img_url)) { img_url = imageElement.attr("href"); } } if (StringUtils.isBlank(img_url) && StringUtils.isNotBlank(defImageUrl)) { logger.info("Setting image to default: " + defImageUrl); img_url = defImageUrl; } img_url = StringUtils.strip(img_url); //get TITLE if (StringUtils.isNotBlank(titleCssSelector)) { Element titleElement = articleDoc.select(titleCssSelector).first(); if ((titleElement != null) && (titleElement.attr("content") != null)) { title = titleElement.attr("content"); } // if still blank get from text instead if (StringUtils.isBlank(title) && (titleElement != null)) { title = titleElement.text(); } } //get LEAD TEXT if (StringUtils.isNotBlank(leadTextCssSelector)) { Element leadElement = articleDoc.select(leadTextCssSelector).first(); if (leadElement != null && leadElement.attr("content") != null) { leadtext = leadElement.attr("content"); } } //get publish date if (StringUtils.isNotBlank(publishDateCssSelector)) { //2013-01-21T10:40:55Z Element pubElement = articleDoc.select(publishDateCssSelector).first(); if (pubElement != null && pubElement.attr("content") != null) { String pubtext = pubElement.attr("content"); SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH); Date result = null; try { result = df.parse(pubtext); } catch (ParseException e) { logger.info("Failed to parse date withUTC format " + pubtext); } //try a simpler format df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH); try { result = df.parse(pubtext); } catch (ParseException e) { logger.info("Failed to parse date " + pubtext); } if (result != null) publishDate = dateFormatter.format(result); else logger.error("Failed to parse date " + pubtext); } } //get Link if (StringUtils.isNotBlank(linkCssSelector)) { Element linkElement = articleDoc.select(linkCssSelector).first(); if (linkElement != null && linkElement.attr("content") != null) { link = linkElement.attr("content"); } } //get CONTENT if (StringUtils.isNotBlank(textCssSelector)) { Element descriptionElement = articleDoc.select(textCssSelector).first(); if (descriptionElement != null) description = Jsoup.parse(descriptionElement.html()).text(); } //get TAGS Set<String> tagSet = AttributesImporterUtils.getTags(articleDoc, tagsCssSelector, title); if (tagSet.size() > 0) tags = CollectionTools.join(tagSet, ","); //get CATEGORY - client specific if (StringUtils.isNotBlank(categoryCssSelector)) { Element categoryElement = articleDoc.select(categoryCssSelector).first(); if (categoryElement != null && categoryElement.attr("content") != null) { category = categoryElement.attr("content"); if (StringUtils.isNotBlank(category)) category = category.toUpperCase(); } } else if (StringUtils.isNotBlank(categoryClassPrefix)) { String className = "io.seldon.importer.articles.category." + categoryClassPrefix + "CategoryExtractor"; Class<?> clazz = Class.forName(className); Constructor<?> ctor = clazz.getConstructor(); CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance(); category = extractor.getCategory(url, articleDoc); } //get Sub CATEGORY - client specific if (StringUtils.isNotBlank(subCategoryCssSelector)) { Element subCategoryElement = articleDoc.select(subCategoryCssSelector).first(); if (subCategoryElement != null && subCategoryElement.attr("content") != null) { subCategory = subCategoryElement.attr("content"); if (StringUtils.isNotBlank(subCategory)) subCategory = category.toUpperCase(); } } else if (StringUtils.isNotBlank(subCategoryClassPrefix)) { String className = "io.seldon.importer.articles.category." + subCategoryClassPrefix + "SubCategoryExtractor"; Class<?> clazz = Class.forName(className); Constructor<?> ctor = clazz.getConstructor(); CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance(); subCategory = extractor.getCategory(url, articleDoc); } // Get domain if (domainIsNeeded) { domain = getDomain(url); } if ((StringUtils.isNotBlank(title) && (imageNotNeeded || StringUtils.isNotBlank(img_url)) && (categoryNotNeeded || StringUtils.isNotBlank(category)) && (!domainIsNeeded || StringUtils.isNotBlank(domain)))) { attributes = new HashMap<String, String>(); attributes.put(TITLE, title); if (StringUtils.isNotBlank(category)) attributes.put(CATEGORY, category); if (StringUtils.isNotBlank(subCategory)) attributes.put(SUBCATEGORY, subCategory); if (StringUtils.isNotBlank(link)) attributes.put(LINK, link); if (StringUtils.isNotBlank(leadtext)) attributes.put(LEAD_TEXT, leadtext); if (StringUtils.isNotBlank(img_url)) attributes.put(IMG_URL, img_url); if (StringUtils.isNotBlank(tags)) attributes.put(TAGS, tags); attributes.put(CONTENT_TYPE, VERIFIED_CONTENT_TYPE); if (StringUtils.isNotBlank(description)) attributes.put(DESCRIPTION, description); if (StringUtils.isNotBlank(publishDate)) attributes.put(PUBLISH_DATE, publishDate); if (StringUtils.isNotBlank(domain)) attributes.put(DOMAIN, domain); System.out.println("Item: " + url + "; Category: " + category + " SubCategory: " + subCategory); itemProcessResult.extraction_status = "EXTRACTION_SUCCEEDED"; } else { logger.warn("Failed to get needed attributes for article " + url); logger.warn("[title=" + title + ", img_url=" + img_url + ", category=" + category + ", domain=" + domain + "]"); } { // check for failures for the log result if (StringUtils.isBlank(title)) { itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "title"; } if (!imageNotNeeded && StringUtils.isBlank(img_url)) { itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "img_url"; } if (!categoryNotNeeded && StringUtils.isBlank(category)) { itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "category"; } } } catch (Exception e) { logger.error("Article: " + url + ". Attributes import FAILED", e); itemProcessResult.error = e.toString(); } AttributesImporterUtils.logResult(logger, itemProcessResult); return attributes; }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
private static List<DocBlock> getDocBlock(String jdocBase, Element elem, ClassDocumentation reference) { if (elem != null) { String baseLink = JDocUtil.getLink(jdocBase, reference); List<DocBlock> blocks = new ArrayList<>(10); String hashLink = null;/*from w w w . j a va2 s . c om*/ for (elem = elem.nextElementSibling(); elem != null; elem = elem.nextElementSibling()) { if (elem.tagName().equals("a")) { hashLink = '#' + elem.attr("name"); } else if (elem.tagName().equals("ul")) { Element tmp = elem.getElementsByTag("h4").first(); String title = JDocUtil.fixSpaces(tmp.text().trim()); String description = "", signature = ""; OrderedMap<String, List<String>> fields = new ListOrderedMap<>(); for (; tmp != null; tmp = tmp.nextElementSibling()) { if (tmp.tagName().equals("pre")) { //contains full signature signature = JDocUtil.fixSpaces(tmp.text().trim()); } else if (tmp.tagName().equals("div") && tmp.className().equals("block")) { //main block of content (description or deprecation) Element deprecationElem = tmp.getElementsByClass("deprecationComment").first(); if (deprecationElem != null) { //deprecation block fields.put("Deprecated:", Collections .singletonList(JDocUtil.formatText(deprecationElem.html(), baseLink))); } else { //description block description = JDocUtil.formatText(tmp.html(), baseLink); } } else if (tmp.tagName().equals("dl")) { //a field String fieldName = null; List<String> fieldValues = new ArrayList<>(); for (Element element : tmp.children()) { if (element.tagName().equals("dt")) { if (fieldName != null) { fields.put(fieldName, fieldValues); fieldValues = new ArrayList<>(); } fieldName = JDocUtil.fixSpaces(element.text().trim()); } else if (element.tagName().equals("dd")) { fieldValues.add(JDocUtil.formatText(element.html(), baseLink)); } } if (fieldName != null) { fields.put(fieldName, fieldValues); } } } blocks.add(new DocBlock(title, hashLink, signature, description, fields)); } } return blocks; } return null; }
From source file:com.astamuse.asta4d.render.RenderUtil.java
/** * Find out all the snippet in the passed Document and execute them. The Containing embed tag of the passed Document will be exactly * mixed in here too. <br>//from w ww. j a va 2 s.c o m * Recursively contained snippets will be executed from outside to inside, thus the inner snippets will not be executed until all of * their outer snippets are finished. Also, the dynamically created snippets and embed tags will comply with this rule too. * * @param doc * the Document to apply snippets * @throws SnippetNotResovlableException * @throws SnippetInvokeException * @throws TemplateException */ public final static void applySnippets(Document doc) throws SnippetNotResovlableException, SnippetInvokeException, TemplateException, TemplateNotFoundException { if (doc == null) { return; } applyClearAction(doc, false); // retrieve ready snippets String selector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_READY); List<Element> snippetList = new ArrayList<>(doc.select(selector)); int readySnippetCount = snippetList.size(); int blockedSnippetCount = 0; for (int i = readySnippetCount - 1; i >= 0; i--) { // if parent snippet has not been executed, the current snippet will // not be executed too. if (isBlockedByParentSnippet(doc, snippetList.get(i))) { snippetList.remove(i); blockedSnippetCount++; } } readySnippetCount = readySnippetCount - blockedSnippetCount; String renderDeclaration; Renderer renderer; Context context = Context.getCurrentThreadContext(); Configuration conf = Configuration.getConfiguration(); final SnippetInvoker invoker = conf.getSnippetInvoker(); String refId; String currentTemplatePath; Element renderTarget; for (Element element : snippetList) { if (!conf.isSkipSnippetExecution()) { // for a faked snippet node which is created by template // analyzing process, the render target element should be its // child. if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE) .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) { renderTarget = element.children().first(); // the hosting element of this faked snippet has been removed by outer a snippet if (renderTarget == null) { element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED); continue; } } else { renderTarget = element; } // we have to reset the ref of current snippet at every time to make sure the ref is always unique(duplicated snippet ref // could be created by list rendering) TemplateUtil.resetSnippetRefs(element); context.setCurrentRenderingElement(renderTarget); renderDeclaration = element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_RENDER); refId = element.attr(ExtNodeConstants.ATTR_SNIPPET_REF); currentTemplatePath = element.attr(ExtNodeConstants.ATTR_TEMPLATE_PATH); context.setCurrentRenderingElement(renderTarget); context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath); try { if (element.hasAttr(ExtNodeConstants.SNIPPET_NODE_ATTR_PARALLEL)) { ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc); final Context newContext = context.clone(); final String declaration = renderDeclaration; crHelper.submitWithContext(newContext, declaration, refId, new Callable<Renderer>() { @Override public Renderer call() throws Exception { return invoker.invoke(declaration); } }); element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_WAITING); } else { renderer = invoker.invoke(renderDeclaration); applySnippetResultToElement(doc, refId, element, renderTarget, renderer); } } catch (SnippetNotResovlableException | SnippetInvokeException e) { throw e; } catch (Exception e) { SnippetInvokeException se = new SnippetInvokeException( "Error occured when executing rendering on [" + renderDeclaration + "]:" + e.getMessage(), e); throw se; } context.setData(TRACE_VAR_TEMPLATE_PATH, null); context.setCurrentRenderingElement(null); } else {// if skip snippet element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED); } } // load embed nodes which blocking parents has finished List<Element> embedNodeList = doc.select(ExtNodeConstants.EMBED_NODE_TAG_SELECTOR); int embedNodeListCount = embedNodeList.size(); Iterator<Element> embedNodeIterator = embedNodeList.iterator(); Element embed; Element embedContent; while (embedNodeIterator.hasNext()) { embed = embedNodeIterator.next(); if (isBlockedByParentSnippet(doc, embed)) { embedNodeListCount--; continue; } embedContent = TemplateUtil.getEmbedNodeContent(embed); TemplateUtil.mergeBlock(doc, embedContent); embed.before(embedContent); embed.remove(); } if ((readySnippetCount + embedNodeListCount) > 0) { TemplateUtil.regulateElement(null, doc); applySnippets(doc); } else { ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc); String delcaration = null; if (crHelper.hasUnCompletedTask()) { delcaration = null; try { FutureRendererHolder holder = crHelper.take(); delcaration = holder.getRenderDeclaration(); String ref = holder.getSnippetRefId(); String reSelector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR, ExtNodeConstants.ATTR_SNIPPET_REF, ref); Element element = doc.select(reSelector).get(0);// must have Element target; if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE) .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) { target = element.children().first(); } else { target = element; } applySnippetResultToElement(doc, ref, element, target, holder.getRenderer()); applySnippets(doc); } catch (InterruptedException | ExecutionException e) { throw new SnippetInvokeException("Concurrent snippet invocation failed" + (delcaration == null ? "" : " on [" + delcaration + "]"), e); } } } }
From source file:com.nineash.hutsync.client.NetworkUtilities.java
/** * Perform 2-way sync with the server-side contacts. We send a request that * includes all the locally-dirty contacts so that the server can process * those changes, and we receive (and return) a list of contacts that were * updated on the server-side that need to be updated locally. * * @param account The account being synced * @param authtoken The authtoken stored in the AccountManager for this * account/*ww w . j a v a 2 s .c o m*/ * @param serverSyncState A token returned from the server on the last sync * @param dirtyContacts A list of the contacts to send to the server * @return A list of contacts that we need to update locally */ public static void syncCalendar(Context context, Account account, String authtoken, long serverSyncState) throws JSONException, ParseException, IOException, AuthenticationException { ArrayList<SerializableCookie> myCookies; CookieStore cookieStore = new BasicCookieStore(); DefaultHttpClient hClient = getHttpClient(context); mContentResolver = context.getContentResolver(); final String[] weeknames = { "rota_this_week", "rota_next_week" }; long calendar_id = getCalendar(account); if (calendar_id == -1) { Log.e("CalendarSyncAdapter", "Unable to create HutSync event calendar"); return; } try { myCookies = (ArrayList<SerializableCookie>) fromString(authtoken); } catch (final IOException e) { Log.e(TAG, "IOException when expanding authtoken", e); return; } catch (final ClassNotFoundException e) { Log.e(TAG, "ClassNotFoundException when expanding authtoken", e); return; } for (SerializableCookie cur_cookie : myCookies) { cookieStore.addCookie(cur_cookie.getCookie()); } hClient.setCookieStore(cookieStore); Log.i(TAG, "Syncing to: " + SYNC_CONTACTS_URI); HttpGet httpget = new HttpGet(SYNC_CONTACTS_URI); final HttpResponse resp = hClient.execute(httpget); final String response = EntityUtils.toString(resp.getEntity()); HashMap<Long, SyncEntry> localEvents = new HashMap<Long, SyncEntry>(); ArrayList<Event> events = new ArrayList<Event>(); Pattern p = Pattern.compile("background-color:(#[[a-f][A-F][0-9]]{6})"); Pattern ps = Pattern .compile(".calendar-key span.(\\S+) \\{ background-color:(#[[a-f][A-F][0-9]]{6}); color:#fff; \\}"); if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { //check we are still logged in //if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_UNAUTHORIZED) { // Log.e(TAG, "Authentication exception in sending dirty contacts"); // throw new AuthenticationException(); //} //if we are logged in Map<String, String> shift_types = new HashMap<String, String>(); int length = weeknames.length; Document doc = Jsoup.parse(response); String full_name = doc.select("a[href*=" + account.name + "/profile]").first().text(); AccountManager mAccountManager = AccountManager.get(context); Account[] the_accounts = mAccountManager.getAccountsByType(Constants.ACCOUNT_TYPE); boolean multiple_accounts = (the_accounts.length > 1); Elements the_styles = doc.select("style"); for (Element the_style : the_styles) { String st_txt = the_style.html(); Matcher ms = ps.matcher(st_txt); while (ms.find()) { // Find each match in turn; String can't do this. String cname = ms.group(1); // Access a submatch group; String can't do this. String ccol = ms.group(2); String rname = doc.select("span." + cname).first().text(); Log.i(TAG, "LOOK: " + cname + ", " + ccol + ", " + rname); shift_types.put(ccol, rname); } } for (int w = 0; w < weeknames.length; w++) { Elements the_dates = doc.select("div.homepage div.accord-content table[id=" + weeknames[w] + "] tr.heading th:not(.skipStyles)"); //for (Element hidden : the_dates) { //0 is Mon, 6 is Sun Element the_date = the_dates.first(); //figure out the year for the Monday. String str_v = the_date.text(); String[] str_sub = str_v.split(" "); str_sub[1] = str_sub[1].trim(); String[] date_split = str_sub[1].split("/"); Calendar c = Calendar.getInstance(); int this_month = c.get(Calendar.MONTH) + 1; int monday_month = Integer.parseInt(date_split[1]); int this_year = c.get(Calendar.YEAR); int monday_year = this_year; if (this_month > monday_month) { monday_year++; } else if (this_month < monday_month) { monday_year--; } SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy"); Date date = new Date(); if (str_v != null && !str_v.isEmpty()) { String this_date = str_sub[1] + "/" + monday_year; //we need to figure out the year - sometimes its next year try { date = format.parse(this_date); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } Log.i(TAG, "Dates: " + this_date + " - " + date); } //} for (int i = 1; i < 8; ++i) { //1 is monday, 7 is sunday Elements hiddens = doc.select("div.homepage div.accord-content table[id=" + weeknames[w] + "] td:eq(" + Integer.toString(i) + "):not(.skipStyles) div.timeElem"); int add_days = i - 1; for (Element hidden : hiddens) { String str = hidden.text(); if (str != null && !str.isEmpty()) { String style = hidden.attr("style"); String bg_col = ""; Matcher m = p.matcher(style); if (m.find()) { bg_col = m.group(1); // Access a submatch group; String can't do this. } Log.i(TAG, "Time: " + str + "(" + bg_col + ")"); String ev_description = ""; //Location too? if (multiple_accounts) ev_description += full_name + "\n\n"; String[] times = str.split(" - "); String[] start_time = times[0].split(":"); String[] end_time = times[1].split(":"); int add_start_hours = Integer.parseInt(start_time[0]); int add_start_minutes = Integer.parseInt(start_time[1]); int add_finish_hours = Integer.parseInt(end_time[0]); int add_finish_minutes = Integer.parseInt(end_time[1]); String ev_shiftType = ""; if (bg_col != null && !bg_col.isEmpty()) { ev_shiftType = (String) shift_types.get(bg_col); } else { ev_shiftType = "Other"; } String ev_title = ev_shiftType + " Shift"; c.setTime(date); c.add(Calendar.DATE, add_days); c.add(Calendar.HOUR_OF_DAY, add_start_hours); c.add(Calendar.MINUTE, add_start_minutes); Date startDate = c.getTime(); long ev_id = startDate.getTime(); c.setTime(date); c.add(Calendar.DATE, add_days); if (add_finish_hours < add_start_hours) { //shift rolls to next day c.add(Calendar.HOUR_OF_DAY, 24); ev_description += "Shift finishes at " + times[1] + " on the next day\n\n"; } else { c.add(Calendar.HOUR_OF_DAY, add_finish_hours); c.add(Calendar.MINUTE, add_finish_minutes); } Date endDate = c.getTime(); Event ev = new Event(ev_id, ev_title, startDate, endDate, ev_description, ev_shiftType); events.add(ev); Log.i(TAG, "Event: " + ev); } } } } //next merge adjacent shifts SimpleDateFormat timeFormat = new SimpleDateFormat("HH:mm"); Event prev_event = null; for (Iterator<Event> it = events.iterator(); it.hasNext();) { Event cur_event = it.next(); if (prev_event != null) { if (prev_event.getEndDate().compareTo(cur_event.getStartDate()) == 0) { prev_event.setDescription(prev_event.getDescription() + "Merged consecutive shifts:\n" + timeFormat.format(prev_event.getStartDate()) + " to " + timeFormat.format(prev_event.getEndDate()) + " (" + prev_event.getShiftType() + ")\n" + timeFormat.format(cur_event.getStartDate()) + " to " + timeFormat.format(cur_event.getEndDate()) + " (" + cur_event.getShiftType() + ")\n\n"); prev_event.setEndDate(cur_event.getEndDate()); //TODO: only merge if other + FOH/BOH, note times in new description it.remove(); } } prev_event = cur_event; } //next, load local events Cursor c1 = mContentResolver.query( Events.CONTENT_URI.buildUpon().appendQueryParameter(Events.ACCOUNT_NAME, account.name) .appendQueryParameter(Events.ACCOUNT_TYPE, account.type).build(), new String[] { Events._ID, Events._SYNC_ID }, Events.CALENDAR_ID + "=?", new String[] { String.valueOf(calendar_id) }, null); while (c1 != null && c1.moveToNext()) { //if(is_full_sync) { // deleteEvent(context, account, c1.getLong(0)); //} else { SyncEntry entry = new SyncEntry(); entry.raw_id = c1.getLong(0); localEvents.put(c1.getLong(1), entry); //} } c1.close(); try { ArrayList<ContentProviderOperation> operationList = new ArrayList<ContentProviderOperation>(); for (Event event : events) { if (localEvents.containsKey(Long.valueOf(event.getId()))) { SyncEntry entry = localEvents.get(Long.valueOf(event.getId())); operationList.add(updateEvent(calendar_id, account, event, entry.raw_id)); } else { operationList.add(updateEvent(calendar_id, account, event, -1)); } if (operationList.size() >= 50) { try { mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList); } catch (Exception e) { e.printStackTrace(); } operationList.clear(); } } if (operationList.size() > 0) { try { mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList); } catch (Exception e) { e.printStackTrace(); } } } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); return; } } else { Log.e(TAG, "Server error in sending dirty contacts: " + resp.getStatusLine()); throw new IOException(); } }