Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:io.andyc.papercut.api.PrintApi.java

/**
 * Extracts the form data required to submit to the print service in order
 * to set the printer to print to/* w  ww.ja  v a 2  s . c o  m*/
 *
 * @param printerType {Document} - the Document to parse and extract the
 *                    printer type form
 * @param printJob    {PrintJob} - the print job to be worked on
 *
 * @return {Map<String, String>} - the resulting form data to submit to the
 * print service
 */
static Map<String, String> buildPrinterTypeData(Document printerType, PrintJob printJob) {
    Elements printerTypeElements = printerType.select("form#form").select("input");
    Map<String, String> result = new HashMap<>();
    for (Element element : printerTypeElements) {
        String key = element.attr("name");
        String value = element.attr("value");
        if (Objects.equals(key, "$Submit$0")) {
            continue;
        }
        if (key.equals("$RadioGroup")) {
            value = printJob.getPrinterOption().getValue();

        }
        result.put(key, value);
    }

    return result;
}

From source file:io.andyc.papercut.api.PrintApi.java

/**
 * Parses the set number of copies page and builds the data required to
 * submit the form/*from w  w  w .j  a  va 2s  .c  om*/
 *
 * @param printJob {PrintJon} - the print job in question
 * @param prevDoc  {Document} - the HTML page containing the form to set the
 *                 number of copies to be printed
 *
 * @return {Map<String, String>} - a HashMap containing the form data
 */
static Map<String, String> buildSetNumberOfCopiesData(Document prevDoc, PrintJob printJob) {

    Map<String, String> result = new HashMap<>();
    for (Element element : prevDoc.select("form").select("input")) {
        String name = element.attr("name");
        String value = element.attr("value");
        if (Objects.equals(name, "$Submit$0")) {
            continue;
        }
        if (Objects.equals(name, "copies")) {
            value = String.valueOf(printJob.getCopies());
        }
        if (Objects.equals(value, "")) {
            continue;
        }
        result.put(name, value);
    }
    return result;
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static Map<String, String> getInheritedMethods(Element summaryAnchor) {
    Map<String, String> inherited = new HashMap<>();
    if (summaryAnchor == null)
        return inherited;
    summaryAnchor = summaryAnchor.parent();
    Elements inheritAnchors = summaryAnchor.select("a[name^=\"methods.inherited.from.class\"]");
    for (Element inheritAnchor : inheritAnchors) {
        if (inheritAnchor.siblingElements().size() != 2)
            throw new RuntimeException("Got unexpected html while parsing inherited methods from class "
                    + inheritAnchor.attr("name"));
        Element next = inheritAnchor.nextElementSibling();
        if (!next.tagName().equals("h3"))
            throw new RuntimeException("Got unexpected html while parsing inherited methods from class "
                    + inheritAnchor.attr("name"));
        Element sub = next.children().last();
        if (sub == null || !sub.tagName().equals("a"))
            continue;
        String parent = sub.text().toLowerCase();
        next = next.nextElementSibling();
        if (!next.tagName().equals("code"))
            throw new RuntimeException("Got unexpected html while parsing inherited methods from class "
                    + inheritAnchor.attr("name"));
        for (sub = next.children().first(); sub != null; sub = sub.nextElementSibling()) {
            if (sub.tagName().equals("a")) {
                inherited.putIfAbsent(sub.text().toLowerCase(), parent);
            }//from   w ww .ja  va2  s  .  c  om
        }
    }
    return inherited;
}

From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java

private static String reformatXHtml(final String inputXhtml,
        final Map<String, ConfluenceLink> confluenceLinkMap) {
    final Document document = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser());
    document.outputSettings().prettyPrint(false);
    document.outputSettings().escapeMode(xhtml);
    document.outputSettings().charset("UTF-8");

    final Elements linkElements = document.select("a");

    for (final Element linkElement : linkElements) {
        final String originalHref = linkElement.attr("href");
        final ConfluenceLink confluenceLink = confluenceLinkMap.get(originalHref);

        if (confluenceLink == null) {
            LOG.debug("NO LINK MAPPING FOUND TO COVERT LINK: {}", originalHref);
            continue;
        }/*w ww .  j  av a  2  s  .co m*/

        final String confluenceLinkMarkup = confluenceLink.getConfluenceLinkMarkup();

        LOG.debug("LINK CONVERSION: {} -> {}", originalHref, confluenceLinkMarkup);

        linkElement.before(confluenceLinkMarkup);

        linkElement.html("");
        linkElement.unwrap();
    }

    reformatXHtmlHeadings(document, "h2");
    reformatXHtmlHeadings(document, "h3");
    reformatXHtmlHeadings(document, "#toctitle");

    final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get();

    if (swaggerConfluenceConfig.getPaginationMode() == PaginationMode.SINGLE_PAGE) {
        if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) {
            reformatXHtmlBreakAfterElements(document, "#toc");
        }

        reformatXHtmlBreakAfterElements(document, ".sect1");
    }

    reformatXHtmlSpacing(document.select(".sect2"));
    reformatXHtmlSpacing(document.select(".sect3"));

    return document.html();
}

From source file:com.screenslicer.core.util.BrowserUtil.java

private static WebElement toElement(Browser browser, HtmlNode htmlNode, Element body, boolean recurse)
        throws ActionFailed {
    if (body == null) {
        body = BrowserUtil.openElement(browser, true, null, null, null, null);
    }/*  w ww.  ja  v  a  2 s.  c om*/
    if (!CommonUtil.isEmpty(htmlNode.id)) {
        Elements elements = body.getElementsByAttributeValue("id", htmlNode.id);
        if (elements.size() == 1) {
            WebElement element = toElement(browser, elements.get(0), htmlNode, recurse);
            if (element != null) {
                return element;
            }
        }
    }
    List<Elements> selected = new ArrayList<Elements>();
    if (!CommonUtil.isEmpty(htmlNode.tagName)) {
        selected.add(body.getElementsByTag(htmlNode.tagName));
    } else if (!CommonUtil.isEmpty(htmlNode.href)) {
        selected.add(body.getElementsByTag("a"));
    }
    if (!CommonUtil.isEmpty(htmlNode.id)) {
        selected.add(body.getElementsByAttributeValue("id", htmlNode.id));
    }
    if (!CommonUtil.isEmpty(htmlNode.name)) {
        selected.add(body.getElementsByAttributeValue("name", htmlNode.name));
    }
    if (!CommonUtil.isEmpty(htmlNode.type)) {
        selected.add(body.getElementsByAttributeValue("type", htmlNode.type));
    }
    if (!CommonUtil.isEmpty(htmlNode.value)) {
        selected.add(body.getElementsByAttributeValue("value", htmlNode.value));
    }
    if (!CommonUtil.isEmpty(htmlNode.title)) {
        selected.add(body.getElementsByAttributeValue("title", htmlNode.title));
    }
    if (!CommonUtil.isEmpty(htmlNode.role)) {
        selected.add(body.getElementsByAttributeValue("role", htmlNode.role));
    }
    if (!CommonUtil.isEmpty(htmlNode.alt)) {
        selected.add(body.getElementsByAttributeValue("alt", htmlNode.alt));
    }
    if (htmlNode.classes != null && htmlNode.classes.length > 0) {
        Map<Element, Integer> found = new HashMap<Element, Integer>();
        for (int i = 0; i < htmlNode.classes.length; i++) {
            Elements elements = body.getElementsByClass(htmlNode.classes[i]);
            for (Element element : elements) {
                if (!found.containsKey(element)) {
                    found.put(element, 0);
                }
                found.put(element, found.get(element) + 1);
            }
        }
        Elements elements = new Elements();
        for (int i = htmlNode.classes.length; i > 0; i--) {
            for (Map.Entry<Element, Integer> entry : found.entrySet()) {
                if (entry.getValue() == i) {
                    elements.add(entry.getKey());
                }
            }
            if (!elements.isEmpty()) {
                break;
            }
        }
        selected.add(elements);
    }
    if (!CommonUtil.isEmpty(htmlNode.href)) {
        Elements hrefs = body.getElementsByAttribute("href");
        Elements toAdd = new Elements();
        String currentUrl = browser.getCurrentUrl();
        String hrefGiven = htmlNode.href;
        for (Element href : hrefs) {
            String hrefFound = href.attr("href");
            if (hrefGiven.equalsIgnoreCase(hrefFound)) {
                toAdd.add(href);
                toAdd.add(href);
                toAdd.add(href);
            } else if (htmlNode.fuzzy && hrefFound != null && hrefFound.endsWith(hrefGiven)) {
                toAdd.add(href);
                toAdd.add(href);
            } else if (htmlNode.fuzzy && hrefFound != null && hrefFound.contains(hrefGiven)) {
                toAdd.add(href);
            } else {
                String uriGiven = UrlUtil.toCanonicalUri(currentUrl, hrefGiven);
                String uriFound = UrlUtil.toCanonicalUri(currentUrl, hrefFound);
                if (uriGiven.equalsIgnoreCase(uriFound)) {
                    toAdd.add(href);
                }
            }
        }
        selected.add(toAdd);
    }
    if (!CommonUtil.isEmpty(htmlNode.innerText)) {
        selected.add(body.getElementsMatchingText(Pattern.quote(htmlNode.innerText)));
        selected.add(body.getElementsMatchingText("^\\s*" + Pattern.quote(htmlNode.innerText) + "\\s*$"));
    }
    if (htmlNode.multiple != null) {
        selected.add(body.getElementsByAttribute("multiple"));
    }
    Map<Element, Integer> votes = new HashMap<Element, Integer>();
    for (Elements elements : selected) {
        for (Element element : elements) {
            if (!votes.containsKey(element)) {
                votes.put(element, 0);
            }
            votes.put(element, votes.get(element) + 2);
            if (!NodeUtil.isHidden(element)) {
                votes.put(element, votes.get(element) + 1);
            }
        }
    }
    int maxVote = 0;
    Element maxElement = null;
    for (Map.Entry<Element, Integer> entry : votes.entrySet()) {
        if (entry.getValue() > maxVote) {
            maxVote = entry.getValue();
            maxElement = entry.getKey();
        }
    }
    return toElement(browser, maxElement, htmlNode, recurse);
}

From source file:io.seldon.importer.articles.FileItemAttributesImporter.java

public static Map<String, String> getAttributes(String url, String existingCategory) {
    ItemProcessResult itemProcessResult = new ItemProcessResult();
    itemProcessResult.client_item_id = url;
    itemProcessResult.extraction_status = "EXTRACTION_FAILED";

    logger.info("Trying to get attributes for " + url);
    Map<String, String> attributes = null;
    String title = "";
    String category = "";
    String subCategory = "";
    String img_url = "";
    String description = "";
    String tags = "";
    String leadtext = "";
    String link = "";
    String publishDate = "";
    String domain = "";
    try {/*w  w w . ja v a 2 s  .com*/
        long now = System.currentTimeMillis();
        long timeSinceLastRequest = now - lastUrlFetchTime;
        if (timeSinceLastRequest < minFetchGapMsecs) {
            long timeToSleep = minFetchGapMsecs - timeSinceLastRequest;
            logger.info(
                    "Sleeping " + timeToSleep + "msecs as time since last fetch is " + timeSinceLastRequest);
            Thread.sleep(timeToSleep);
        }
        Document articleDoc = Jsoup.connect(url).userAgent("SeldonBot/1.0").timeout(httpGetTimeout).get();
        lastUrlFetchTime = System.currentTimeMillis();
        //get IMAGE URL
        if (StringUtils.isNotBlank(imageCssSelector)) {
            Element imageElement = articleDoc.select(imageCssSelector).first();
            if (imageElement != null) {
                if (imageElement.attr("content") != null) {
                    img_url = imageElement.attr("content");
                }
                if (StringUtils.isBlank(img_url) && imageElement.attr("src") != null) {
                    img_url = imageElement.attr("src");
                }
                if (StringUtils.isBlank(img_url) && imageElement.attr("href") != null) {
                    img_url = imageElement.attr("href");
                }

            }
        }
        if (StringUtils.isBlank(img_url) && StringUtils.isNotBlank(defImageUrl)) {
            logger.info("Setting image to default: " + defImageUrl);
            img_url = defImageUrl;
        }
        img_url = StringUtils.strip(img_url);

        //get TITLE
        if (StringUtils.isNotBlank(titleCssSelector)) {
            Element titleElement = articleDoc.select(titleCssSelector).first();
            if (titleElement != null && titleElement.attr("content") != null) {
                title = titleElement.attr("content");
            }
        }

        //get Lead Text
        if (StringUtils.isNotBlank(leadTextCssSelector)) {
            Element leadElement = articleDoc.select(leadTextCssSelector).first();
            if (leadElement != null && leadElement.attr("content") != null) {
                leadtext = leadElement.attr("content");
            }
        }

        //get publish date
        if (StringUtils.isNotBlank(publishDateCssSelector)) {
            //2013-01-21T10:40:55Z
            Element pubElement = articleDoc.select(publishDateCssSelector).first();
            if (pubElement != null && pubElement.attr("content") != null) {
                String pubtext = pubElement.attr("content");
                SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
                Date result = null;
                try {
                    result = df.parse(pubtext);
                } catch (ParseException e) {
                    logger.info("Failed to parse date withUTC format " + pubtext);
                }
                //try a simpler format
                df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH);
                try {
                    result = df.parse(pubtext);
                } catch (ParseException e) {
                    logger.info("Failed to parse date " + pubtext);
                }

                if (result != null)
                    publishDate = dateFormatter.format(result);
                else
                    logger.error("Failed to parse date " + pubtext);
            }
        }

        //get Link
        if (StringUtils.isNotBlank(linkCssSelector)) {
            Element linkElement = articleDoc.select(linkCssSelector).first();
            if (linkElement != null && linkElement.attr("content") != null) {
                link = linkElement.attr("content");
            }
        }

        //get CONTENT
        if (StringUtils.isNotBlank(textCssSelector)) {
            Element descriptionElement = articleDoc.select(textCssSelector).first();
            if (descriptionElement != null)
                description = Jsoup.parse(descriptionElement.html()).text();
        }

        //get TAGS
        Set<String> tagSet = AttributesImporterUtils.getTags(articleDoc, tagsCssSelector, title);

        if (tagSet.size() > 0)
            tags = CollectionTools.join(tagSet, ",");

        //get CATEGORY - client specific
        if (StringUtils.isNotBlank(categoryCssSelector)) {
            Element categoryElement = articleDoc.select(categoryCssSelector).first();
            if (categoryElement != null && categoryElement.attr("content") != null) {
                category = categoryElement.attr("content");
                if (StringUtils.isNotBlank(category))
                    category = category.toUpperCase();
            }
        } else if (StringUtils.isNotBlank(categoryClassPrefix)) {
            String className = "io.seldon.importer.articles.category." + categoryClassPrefix
                    + "CategoryExtractor";
            Class<?> clazz = Class.forName(className);
            Constructor<?> ctor = clazz.getConstructor();
            CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance();
            category = extractor.getCategory(url, articleDoc);
        }

        //get Sub CATEGORY - client specific
        if (StringUtils.isNotBlank(subCategoryCssSelector)) {
            Element subCategoryElement = articleDoc.select(subCategoryCssSelector).first();
            if (subCategoryElement != null && subCategoryElement.attr("content") != null) {
                subCategory = subCategoryElement.attr("content");
                if (StringUtils.isNotBlank(subCategory))
                    subCategory = category.toUpperCase();
            }
        } else if (StringUtils.isNotBlank(subCategoryClassPrefix)) {
            String className = "io.seldon.importer.articles.category." + subCategoryClassPrefix
                    + "SubCategoryExtractor";
            Class<?> clazz = Class.forName(className);
            Constructor<?> ctor = clazz.getConstructor();
            CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance();
            subCategory = extractor.getCategory(url, articleDoc);
        }

        // Get domain
        if (domainIsNeeded) {
            domain = getDomain(url);
        }

        if (StringUtils.isNotBlank(title) && (imageNotNeeded || StringUtils.isNotBlank(img_url))
                && (categoryNotNeeded || StringUtils.isNotBlank(category))
                && (!domainIsNeeded || StringUtils.isNotBlank(domain))) {
            attributes = new HashMap<String, String>();
            attributes.put(TITLE, title);
            if (StringUtils.isNotBlank(category))
                attributes.put(CATEGORY, category);
            if (StringUtils.isNotBlank(subCategory))
                attributes.put(SUBCATEGORY, subCategory);
            if (StringUtils.isNotBlank(link))
                attributes.put(LINK, link);
            if (StringUtils.isNotBlank(leadtext))
                attributes.put(LEAD_TEXT, leadtext);
            if (StringUtils.isNotBlank(img_url))
                attributes.put(IMG_URL, img_url);
            if (StringUtils.isNotBlank(tags))
                attributes.put(TAGS, tags);
            attributes.put(CONTENT_TYPE, VERIFIED_CONTENT_TYPE);
            if (StringUtils.isNotBlank(description))
                attributes.put(DESCRIPTION, description);
            if (StringUtils.isNotBlank(publishDate))
                attributes.put(PUBLISH_DATE, publishDate);
            if (StringUtils.isNotBlank(domain))
                attributes.put(DOMAIN, domain);
            System.out.println("Item: " + url + "; Category: " + category);
            itemProcessResult.extraction_status = "EXTRACTION_SUCCEEDED";
        } else {
            logger.warn("Failed to get title for article " + url);
            logger.warn("[title=" + title + ", img_url=" + img_url + ", category=" + category + ", domain="
                    + domain + "]");
        }

        { // check for failures for the log result
            if (StringUtils.isBlank(title)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "title";
            }
            if (!imageNotNeeded && StringUtils.isBlank(img_url)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "img_url";
            }
            if (!categoryNotNeeded && StringUtils.isBlank(category)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",")
                        + "category";
            }
        }
    } catch (Exception e) {
        logger.warn("Article: " + url + ". Attributes import FAILED", e);
        itemProcessResult.error = e.toString();
    }

    AttributesImporterUtils.logResult(logger, itemProcessResult);

    return attributes;
}

From source file:io.seldon.importer.articles.ItemAttributesImporter.java

public static Map<String, String> getAttributes(String url, String existingCategory) {
    ItemProcessResult itemProcessResult = new ItemProcessResult();
    itemProcessResult.client_item_id = url;
    itemProcessResult.extraction_status = "EXTRACTION_FAILED";

    logger.info("Trying to get attributes for " + url);
    Map<String, String> attributes = null;
    String title = "";
    String category = "";
    String subCategory = "";
    String img_url = "";
    String description = "";
    String tags = "";
    String leadtext = "";
    String link = "";
    String publishDate = "";
    String domain = "";
    try {//from ww  w.j  a v a  2s  .  co  m
        long now = System.currentTimeMillis();
        long timeSinceLastRequest = now - lastUrlFetchTime;
        if (timeSinceLastRequest < minFetchGapMsecs) {
            long timeToSleep = minFetchGapMsecs - timeSinceLastRequest;
            logger.info(
                    "Sleeping " + timeToSleep + "msecs as time since last fetch is " + timeSinceLastRequest);
            Thread.sleep(timeToSleep);
        }
        Document articleDoc = Jsoup.connect(url).userAgent("SeldonBot/1.0").timeout(httpGetTimeout).get();
        lastUrlFetchTime = System.currentTimeMillis();
        //get IMAGE URL
        if (StringUtils.isNotBlank(imageCssSelector)) {
            Element imageElement = articleDoc.select(imageCssSelector).first();
            if (imageElement != null && imageElement.attr("content") != null) {
                img_url = imageElement.attr("content");
            }
            if (imageElement != null && StringUtils.isBlank(img_url)) {
                img_url = imageElement.attr("src");
            }
            if (imageElement != null && StringUtils.isBlank(img_url)) {
                img_url = imageElement.attr("href");
            }

        }

        if (StringUtils.isBlank(img_url) && StringUtils.isNotBlank(defImageUrl)) {
            logger.info("Setting image to default: " + defImageUrl);
            img_url = defImageUrl;
        }
        img_url = StringUtils.strip(img_url);

        //get TITLE
        if (StringUtils.isNotBlank(titleCssSelector)) {
            Element titleElement = articleDoc.select(titleCssSelector).first();
            if ((titleElement != null) && (titleElement.attr("content") != null)) {
                title = titleElement.attr("content");
            }

            // if still blank get from text instead
            if (StringUtils.isBlank(title) && (titleElement != null)) {
                title = titleElement.text();
            }
        }

        //get LEAD TEXT
        if (StringUtils.isNotBlank(leadTextCssSelector)) {
            Element leadElement = articleDoc.select(leadTextCssSelector).first();
            if (leadElement != null && leadElement.attr("content") != null) {
                leadtext = leadElement.attr("content");
            }
        }

        //get publish date
        if (StringUtils.isNotBlank(publishDateCssSelector)) {
            //2013-01-21T10:40:55Z
            Element pubElement = articleDoc.select(publishDateCssSelector).first();
            if (pubElement != null && pubElement.attr("content") != null) {
                String pubtext = pubElement.attr("content");
                SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
                Date result = null;
                try {
                    result = df.parse(pubtext);
                } catch (ParseException e) {
                    logger.info("Failed to parse date withUTC format " + pubtext);
                }
                //try a simpler format
                df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH);
                try {
                    result = df.parse(pubtext);
                } catch (ParseException e) {
                    logger.info("Failed to parse date " + pubtext);
                }

                if (result != null)
                    publishDate = dateFormatter.format(result);
                else
                    logger.error("Failed to parse date " + pubtext);
            }
        }

        //get Link
        if (StringUtils.isNotBlank(linkCssSelector)) {
            Element linkElement = articleDoc.select(linkCssSelector).first();
            if (linkElement != null && linkElement.attr("content") != null) {
                link = linkElement.attr("content");
            }
        }

        //get CONTENT
        if (StringUtils.isNotBlank(textCssSelector)) {
            Element descriptionElement = articleDoc.select(textCssSelector).first();
            if (descriptionElement != null)
                description = Jsoup.parse(descriptionElement.html()).text();
        }

        //get TAGS
        Set<String> tagSet = AttributesImporterUtils.getTags(articleDoc, tagsCssSelector, title);

        if (tagSet.size() > 0)
            tags = CollectionTools.join(tagSet, ",");

        //get CATEGORY - client specific
        if (StringUtils.isNotBlank(categoryCssSelector)) {
            Element categoryElement = articleDoc.select(categoryCssSelector).first();
            if (categoryElement != null && categoryElement.attr("content") != null) {
                category = categoryElement.attr("content");
                if (StringUtils.isNotBlank(category))
                    category = category.toUpperCase();
            }
        } else if (StringUtils.isNotBlank(categoryClassPrefix)) {
            String className = "io.seldon.importer.articles.category." + categoryClassPrefix
                    + "CategoryExtractor";
            Class<?> clazz = Class.forName(className);
            Constructor<?> ctor = clazz.getConstructor();
            CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance();
            category = extractor.getCategory(url, articleDoc);
        }

        //get Sub CATEGORY - client specific
        if (StringUtils.isNotBlank(subCategoryCssSelector)) {
            Element subCategoryElement = articleDoc.select(subCategoryCssSelector).first();
            if (subCategoryElement != null && subCategoryElement.attr("content") != null) {
                subCategory = subCategoryElement.attr("content");
                if (StringUtils.isNotBlank(subCategory))
                    subCategory = category.toUpperCase();
            }
        } else if (StringUtils.isNotBlank(subCategoryClassPrefix)) {
            String className = "io.seldon.importer.articles.category." + subCategoryClassPrefix
                    + "SubCategoryExtractor";
            Class<?> clazz = Class.forName(className);
            Constructor<?> ctor = clazz.getConstructor();
            CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance();
            subCategory = extractor.getCategory(url, articleDoc);
        }

        // Get domain
        if (domainIsNeeded) {
            domain = getDomain(url);
        }

        if ((StringUtils.isNotBlank(title) && (imageNotNeeded || StringUtils.isNotBlank(img_url))
                && (categoryNotNeeded || StringUtils.isNotBlank(category))
                && (!domainIsNeeded || StringUtils.isNotBlank(domain)))) {
            attributes = new HashMap<String, String>();
            attributes.put(TITLE, title);
            if (StringUtils.isNotBlank(category))
                attributes.put(CATEGORY, category);
            if (StringUtils.isNotBlank(subCategory))
                attributes.put(SUBCATEGORY, subCategory);
            if (StringUtils.isNotBlank(link))
                attributes.put(LINK, link);
            if (StringUtils.isNotBlank(leadtext))
                attributes.put(LEAD_TEXT, leadtext);
            if (StringUtils.isNotBlank(img_url))
                attributes.put(IMG_URL, img_url);
            if (StringUtils.isNotBlank(tags))
                attributes.put(TAGS, tags);
            attributes.put(CONTENT_TYPE, VERIFIED_CONTENT_TYPE);
            if (StringUtils.isNotBlank(description))
                attributes.put(DESCRIPTION, description);
            if (StringUtils.isNotBlank(publishDate))
                attributes.put(PUBLISH_DATE, publishDate);
            if (StringUtils.isNotBlank(domain))
                attributes.put(DOMAIN, domain);
            System.out.println("Item: " + url + "; Category: " + category + " SubCategory: " + subCategory);
            itemProcessResult.extraction_status = "EXTRACTION_SUCCEEDED";
        } else {
            logger.warn("Failed to get needed attributes for article " + url);
            logger.warn("[title=" + title + ", img_url=" + img_url + ", category=" + category + ", domain="
                    + domain + "]");
        }

        { // check for failures for the log result
            if (StringUtils.isBlank(title)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "title";
            }
            if (!imageNotNeeded && StringUtils.isBlank(img_url)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "img_url";
            }
            if (!categoryNotNeeded && StringUtils.isBlank(category)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",")
                        + "category";
            }
        }
    } catch (Exception e) {
        logger.error("Article: " + url + ". Attributes import FAILED", e);
        itemProcessResult.error = e.toString();
    }

    AttributesImporterUtils.logResult(logger, itemProcessResult);

    return attributes;
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static List<DocBlock> getDocBlock(String jdocBase, Element elem, ClassDocumentation reference) {
    if (elem != null) {
        String baseLink = JDocUtil.getLink(jdocBase, reference);
        List<DocBlock> blocks = new ArrayList<>(10);
        String hashLink = null;/*from  w  w w  .  j  a va2 s  . c  om*/
        for (elem = elem.nextElementSibling(); elem != null; elem = elem.nextElementSibling()) {
            if (elem.tagName().equals("a")) {
                hashLink = '#' + elem.attr("name");
            } else if (elem.tagName().equals("ul")) {
                Element tmp = elem.getElementsByTag("h4").first();
                String title = JDocUtil.fixSpaces(tmp.text().trim());
                String description = "", signature = "";
                OrderedMap<String, List<String>> fields = new ListOrderedMap<>();
                for (; tmp != null; tmp = tmp.nextElementSibling()) {
                    if (tmp.tagName().equals("pre")) {
                        //contains full signature
                        signature = JDocUtil.fixSpaces(tmp.text().trim());
                    } else if (tmp.tagName().equals("div") && tmp.className().equals("block")) {
                        //main block of content (description or deprecation)
                        Element deprecationElem = tmp.getElementsByClass("deprecationComment").first();
                        if (deprecationElem != null) {
                            //deprecation block
                            fields.put("Deprecated:", Collections
                                    .singletonList(JDocUtil.formatText(deprecationElem.html(), baseLink)));
                        } else {
                            //description block
                            description = JDocUtil.formatText(tmp.html(), baseLink);
                        }
                    } else if (tmp.tagName().equals("dl")) {
                        //a field
                        String fieldName = null;
                        List<String> fieldValues = new ArrayList<>();
                        for (Element element : tmp.children()) {
                            if (element.tagName().equals("dt")) {
                                if (fieldName != null) {
                                    fields.put(fieldName, fieldValues);
                                    fieldValues = new ArrayList<>();
                                }
                                fieldName = JDocUtil.fixSpaces(element.text().trim());
                            } else if (element.tagName().equals("dd")) {
                                fieldValues.add(JDocUtil.formatText(element.html(), baseLink));
                            }
                        }
                        if (fieldName != null) {
                            fields.put(fieldName, fieldValues);
                        }
                    }
                }
                blocks.add(new DocBlock(title, hashLink, signature, description, fields));
            }
        }
        return blocks;
    }
    return null;
}

From source file:com.astamuse.asta4d.render.RenderUtil.java

/**
 * Find out all the snippet in the passed Document and execute them. The Containing embed tag of the passed Document will be exactly
 * mixed in here too. <br>//from  w ww.  j a  va 2 s.c o  m
 * Recursively contained snippets will be executed from outside to inside, thus the inner snippets will not be executed until all of
 * their outer snippets are finished. Also, the dynamically created snippets and embed tags will comply with this rule too.
 * 
 * @param doc
 *            the Document to apply snippets
 * @throws SnippetNotResovlableException
 * @throws SnippetInvokeException
 * @throws TemplateException
 */
public final static void applySnippets(Document doc) throws SnippetNotResovlableException,
        SnippetInvokeException, TemplateException, TemplateNotFoundException {
    if (doc == null) {
        return;
    }

    applyClearAction(doc, false);

    // retrieve ready snippets
    String selector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR,
            ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_READY);
    List<Element> snippetList = new ArrayList<>(doc.select(selector));
    int readySnippetCount = snippetList.size();
    int blockedSnippetCount = 0;
    for (int i = readySnippetCount - 1; i >= 0; i--) {
        // if parent snippet has not been executed, the current snippet will
        // not be executed too.
        if (isBlockedByParentSnippet(doc, snippetList.get(i))) {
            snippetList.remove(i);
            blockedSnippetCount++;
        }
    }
    readySnippetCount = readySnippetCount - blockedSnippetCount;

    String renderDeclaration;
    Renderer renderer;
    Context context = Context.getCurrentThreadContext();
    Configuration conf = Configuration.getConfiguration();
    final SnippetInvoker invoker = conf.getSnippetInvoker();

    String refId;
    String currentTemplatePath;
    Element renderTarget;
    for (Element element : snippetList) {
        if (!conf.isSkipSnippetExecution()) {
            // for a faked snippet node which is created by template
            // analyzing process, the render target element should be its
            // child.
            if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE)
                    .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) {
                renderTarget = element.children().first();
                // the hosting element of this faked snippet has been removed by outer a snippet
                if (renderTarget == null) {
                    element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS,
                            ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED);
                    continue;
                }
            } else {
                renderTarget = element;
            }

            // we have to reset the ref of current snippet at every time to make sure the ref is always unique(duplicated snippet ref
            // could be created by list rendering)
            TemplateUtil.resetSnippetRefs(element);

            context.setCurrentRenderingElement(renderTarget);
            renderDeclaration = element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_RENDER);

            refId = element.attr(ExtNodeConstants.ATTR_SNIPPET_REF);
            currentTemplatePath = element.attr(ExtNodeConstants.ATTR_TEMPLATE_PATH);

            context.setCurrentRenderingElement(renderTarget);
            context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath);

            try {
                if (element.hasAttr(ExtNodeConstants.SNIPPET_NODE_ATTR_PARALLEL)) {
                    ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc);
                    final Context newContext = context.clone();
                    final String declaration = renderDeclaration;
                    crHelper.submitWithContext(newContext, declaration, refId, new Callable<Renderer>() {
                        @Override
                        public Renderer call() throws Exception {
                            return invoker.invoke(declaration);
                        }
                    });
                    element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS,
                            ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_WAITING);
                } else {
                    renderer = invoker.invoke(renderDeclaration);
                    applySnippetResultToElement(doc, refId, element, renderTarget, renderer);
                }
            } catch (SnippetNotResovlableException | SnippetInvokeException e) {
                throw e;
            } catch (Exception e) {
                SnippetInvokeException se = new SnippetInvokeException(
                        "Error occured when executing rendering on [" + renderDeclaration + "]:"
                                + e.getMessage(),
                        e);
                throw se;
            }

            context.setData(TRACE_VAR_TEMPLATE_PATH, null);
            context.setCurrentRenderingElement(null);
        } else {// if skip snippet
            element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS,
                    ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED);
        }
    }

    // load embed nodes which blocking parents has finished
    List<Element> embedNodeList = doc.select(ExtNodeConstants.EMBED_NODE_TAG_SELECTOR);
    int embedNodeListCount = embedNodeList.size();
    Iterator<Element> embedNodeIterator = embedNodeList.iterator();
    Element embed;
    Element embedContent;
    while (embedNodeIterator.hasNext()) {
        embed = embedNodeIterator.next();
        if (isBlockedByParentSnippet(doc, embed)) {
            embedNodeListCount--;
            continue;
        }
        embedContent = TemplateUtil.getEmbedNodeContent(embed);
        TemplateUtil.mergeBlock(doc, embedContent);
        embed.before(embedContent);
        embed.remove();
    }

    if ((readySnippetCount + embedNodeListCount) > 0) {
        TemplateUtil.regulateElement(null, doc);
        applySnippets(doc);
    } else {
        ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc);
        String delcaration = null;
        if (crHelper.hasUnCompletedTask()) {
            delcaration = null;
            try {
                FutureRendererHolder holder = crHelper.take();
                delcaration = holder.getRenderDeclaration();
                String ref = holder.getSnippetRefId();
                String reSelector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR,
                        ExtNodeConstants.ATTR_SNIPPET_REF, ref);
                Element element = doc.select(reSelector).get(0);// must have
                Element target;
                if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE)
                        .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) {
                    target = element.children().first();
                } else {
                    target = element;
                }
                applySnippetResultToElement(doc, ref, element, target, holder.getRenderer());
                applySnippets(doc);
            } catch (InterruptedException | ExecutionException e) {
                throw new SnippetInvokeException("Concurrent snippet invocation failed"
                        + (delcaration == null ? "" : " on [" + delcaration + "]"), e);
            }
        }
    }
}

From source file:com.nineash.hutsync.client.NetworkUtilities.java

/**
 * Perform 2-way sync with the server-side contacts. We send a request that
 * includes all the locally-dirty contacts so that the server can process
 * those changes, and we receive (and return) a list of contacts that were
 * updated on the server-side that need to be updated locally.
 *
 * @param account The account being synced
 * @param authtoken The authtoken stored in the AccountManager for this
 *            account/*ww  w  .  j  a  v a 2 s  .c o m*/
 * @param serverSyncState A token returned from the server on the last sync
 * @param dirtyContacts A list of the contacts to send to the server
 * @return A list of contacts that we need to update locally
 */
public static void syncCalendar(Context context, Account account, String authtoken, long serverSyncState)
        throws JSONException, ParseException, IOException, AuthenticationException {
    ArrayList<SerializableCookie> myCookies;
    CookieStore cookieStore = new BasicCookieStore();
    DefaultHttpClient hClient = getHttpClient(context);
    mContentResolver = context.getContentResolver();
    final String[] weeknames = { "rota_this_week", "rota_next_week" };

    long calendar_id = getCalendar(account);
    if (calendar_id == -1) {
        Log.e("CalendarSyncAdapter", "Unable to create HutSync event calendar");
        return;
    }

    try {
        myCookies = (ArrayList<SerializableCookie>) fromString(authtoken);
    } catch (final IOException e) {
        Log.e(TAG, "IOException when expanding authtoken", e);
        return;
    } catch (final ClassNotFoundException e) {
        Log.e(TAG, "ClassNotFoundException when expanding authtoken", e);
        return;
    }

    for (SerializableCookie cur_cookie : myCookies) {
        cookieStore.addCookie(cur_cookie.getCookie());
    }

    hClient.setCookieStore(cookieStore);
    Log.i(TAG, "Syncing to: " + SYNC_CONTACTS_URI);
    HttpGet httpget = new HttpGet(SYNC_CONTACTS_URI);
    final HttpResponse resp = hClient.execute(httpget);
    final String response = EntityUtils.toString(resp.getEntity());
    HashMap<Long, SyncEntry> localEvents = new HashMap<Long, SyncEntry>();
    ArrayList<Event> events = new ArrayList<Event>();
    Pattern p = Pattern.compile("background-color:(#[[a-f][A-F][0-9]]{6})");
    Pattern ps = Pattern
            .compile(".calendar-key span.(\\S+) \\{ background-color:(#[[a-f][A-F][0-9]]{6}); color:#fff; \\}");

    if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
        //check we are still logged in
        //if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_UNAUTHORIZED) {
        //    Log.e(TAG, "Authentication exception in sending dirty contacts");
        //    throw new AuthenticationException();
        //}

        //if we are logged in
        Map<String, String> shift_types = new HashMap<String, String>();
        int length = weeknames.length;
        Document doc = Jsoup.parse(response);
        String full_name = doc.select("a[href*=" + account.name + "/profile]").first().text();

        AccountManager mAccountManager = AccountManager.get(context);
        Account[] the_accounts = mAccountManager.getAccountsByType(Constants.ACCOUNT_TYPE);
        boolean multiple_accounts = (the_accounts.length > 1);

        Elements the_styles = doc.select("style");
        for (Element the_style : the_styles) {
            String st_txt = the_style.html();
            Matcher ms = ps.matcher(st_txt);
            while (ms.find()) { // Find each match in turn; String can't do this.
                String cname = ms.group(1); // Access a submatch group; String can't do this.
                String ccol = ms.group(2);
                String rname = doc.select("span." + cname).first().text();
                Log.i(TAG, "LOOK: " + cname + ", " + ccol + ", " + rname);
                shift_types.put(ccol, rname);
            }
        }

        for (int w = 0; w < weeknames.length; w++) {

            Elements the_dates = doc.select("div.homepage div.accord-content table[id=" + weeknames[w]
                    + "] tr.heading th:not(.skipStyles)");
            //for (Element hidden : the_dates) { //0 is Mon, 6 is Sun
            Element the_date = the_dates.first(); //figure out the year for the Monday.
            String str_v = the_date.text();
            String[] str_sub = str_v.split(" ");
            str_sub[1] = str_sub[1].trim();
            String[] date_split = str_sub[1].split("/");
            Calendar c = Calendar.getInstance();
            int this_month = c.get(Calendar.MONTH) + 1;
            int monday_month = Integer.parseInt(date_split[1]);
            int this_year = c.get(Calendar.YEAR);
            int monday_year = this_year;
            if (this_month > monday_month) {
                monday_year++;
            } else if (this_month < monday_month) {
                monday_year--;
            }

            SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy");
            Date date = new Date();
            if (str_v != null && !str_v.isEmpty()) {
                String this_date = str_sub[1] + "/" + monday_year; //we need to figure out the year - sometimes its next year

                try {
                    date = format.parse(this_date);
                } catch (Exception e) {
                    // TODO Auto-generated catch block  
                    e.printStackTrace();
                }
                Log.i(TAG, "Dates: " + this_date + " - " + date);
            }
            //}

            for (int i = 1; i < 8; ++i) { //1 is monday, 7 is sunday
                Elements hiddens = doc.select("div.homepage div.accord-content table[id=" + weeknames[w]
                        + "] td:eq(" + Integer.toString(i) + "):not(.skipStyles) div.timeElem");
                int add_days = i - 1;
                for (Element hidden : hiddens) {
                    String str = hidden.text();
                    if (str != null && !str.isEmpty()) {
                        String style = hidden.attr("style");
                        String bg_col = "";
                        Matcher m = p.matcher(style);
                        if (m.find()) {
                            bg_col = m.group(1); // Access a submatch group; String can't do this.
                        }

                        Log.i(TAG, "Time: " + str + "(" + bg_col + ")");
                        String ev_description = ""; //Location too?
                        if (multiple_accounts)
                            ev_description += full_name + "\n\n";
                        String[] times = str.split(" - ");
                        String[] start_time = times[0].split(":");
                        String[] end_time = times[1].split(":");
                        int add_start_hours = Integer.parseInt(start_time[0]);
                        int add_start_minutes = Integer.parseInt(start_time[1]);
                        int add_finish_hours = Integer.parseInt(end_time[0]);
                        int add_finish_minutes = Integer.parseInt(end_time[1]);
                        String ev_shiftType = "";
                        if (bg_col != null && !bg_col.isEmpty()) {
                            ev_shiftType = (String) shift_types.get(bg_col);
                        } else {
                            ev_shiftType = "Other";
                        }
                        String ev_title = ev_shiftType + " Shift";

                        c.setTime(date);
                        c.add(Calendar.DATE, add_days);
                        c.add(Calendar.HOUR_OF_DAY, add_start_hours);
                        c.add(Calendar.MINUTE, add_start_minutes);
                        Date startDate = c.getTime();
                        long ev_id = startDate.getTime();

                        c.setTime(date);
                        c.add(Calendar.DATE, add_days);
                        if (add_finish_hours < add_start_hours) { //shift rolls to next day
                            c.add(Calendar.HOUR_OF_DAY, 24);
                            ev_description += "Shift finishes at " + times[1] + " on the next day\n\n";
                        } else {
                            c.add(Calendar.HOUR_OF_DAY, add_finish_hours);
                            c.add(Calendar.MINUTE, add_finish_minutes);
                        }
                        Date endDate = c.getTime();

                        Event ev = new Event(ev_id, ev_title, startDate, endDate, ev_description, ev_shiftType);
                        events.add(ev);
                        Log.i(TAG, "Event: " + ev);
                    }
                }
            }
        }

        //next merge adjacent shifts
        SimpleDateFormat timeFormat = new SimpleDateFormat("HH:mm");
        Event prev_event = null;
        for (Iterator<Event> it = events.iterator(); it.hasNext();) {
            Event cur_event = it.next();
            if (prev_event != null) {
                if (prev_event.getEndDate().compareTo(cur_event.getStartDate()) == 0) {
                    prev_event.setDescription(prev_event.getDescription() + "Merged consecutive shifts:\n"
                            + timeFormat.format(prev_event.getStartDate()) + " to "
                            + timeFormat.format(prev_event.getEndDate()) + " (" + prev_event.getShiftType()
                            + ")\n" + timeFormat.format(cur_event.getStartDate()) + " to "
                            + timeFormat.format(cur_event.getEndDate()) + " (" + cur_event.getShiftType()
                            + ")\n\n");
                    prev_event.setEndDate(cur_event.getEndDate()); //TODO: only merge if other + FOH/BOH, note times in new description
                    it.remove();
                }
            }
            prev_event = cur_event;
        }

        //next, load local events
        Cursor c1 = mContentResolver.query(
                Events.CONTENT_URI.buildUpon().appendQueryParameter(Events.ACCOUNT_NAME, account.name)
                        .appendQueryParameter(Events.ACCOUNT_TYPE, account.type).build(),
                new String[] { Events._ID, Events._SYNC_ID }, Events.CALENDAR_ID + "=?",
                new String[] { String.valueOf(calendar_id) }, null);
        while (c1 != null && c1.moveToNext()) {
            //if(is_full_sync) {
            //   deleteEvent(context, account, c1.getLong(0));
            //} else {
            SyncEntry entry = new SyncEntry();
            entry.raw_id = c1.getLong(0);
            localEvents.put(c1.getLong(1), entry);
            //}
        }
        c1.close();
        try {
            ArrayList<ContentProviderOperation> operationList = new ArrayList<ContentProviderOperation>();
            for (Event event : events) {

                if (localEvents.containsKey(Long.valueOf(event.getId()))) {
                    SyncEntry entry = localEvents.get(Long.valueOf(event.getId()));
                    operationList.add(updateEvent(calendar_id, account, event, entry.raw_id));
                } else {
                    operationList.add(updateEvent(calendar_id, account, event, -1));
                }

                if (operationList.size() >= 50) {
                    try {
                        mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                    operationList.clear();
                }
            }

            if (operationList.size() > 0) {
                try {
                    mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        } catch (Exception e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
            return;
        }

    } else {
        Log.e(TAG, "Server error in sending dirty contacts: " + resp.getStatusLine());
        throw new IOException();
    }
}