Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrValueDynamicExtractor.java

@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {

    String attrib_value = null;//from ww  w  .j a va  2  s.c om

    if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) {
        String cssSelector = attributeDetail.extractor_args.get(0);
        Element element = articleDoc.select(cssSelector).first();
        if (StringUtils.isNotBlank(cssSelector)) {
            int arg_count = 0;
            for (String value_name : attributeDetail.extractor_args) {
                if (arg_count > 0) { // skip the first one, its the cssSelector
                    if (element != null && element.attr(value_name) != null) {
                        attrib_value = element.attr(value_name);
                        if (StringUtils.isNotBlank(attrib_value)) {
                            break;
                        }
                    }
                }
                arg_count++;
            }
        }
    }

    return attrib_value;
}

From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrListValueDynamicExtractor.java

@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {

    String attrib_value = null;/*from   ww w .  j  a v a  2s  .  co m*/

    if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) {
        String cssSelector = attributeDetail.extractor_args.get(0);
        Element element = articleDoc.select(cssSelector).first();
        if (StringUtils.isNotBlank(cssSelector)) {
            int arg_count = 0;
            for (String value_name : attributeDetail.extractor_args) {
                if (arg_count > 0) { // skip the first one, its the cssSelector
                    if (element != null && element.attr(value_name) != null) {
                        String rawList = element.attr(value_name);
                        if (StringUtils.isNotBlank(rawList)) {
                            String[] parts = rawList.split(",");
                            for (int i = 0; i < parts.length; i++) {
                                parts[i] = parts[i].trim().toLowerCase();
                            }
                            attrib_value = StringUtils.join(parts, ',');
                            break;
                        }
                    }
                }
                arg_count++;
            }
        }
    }

    return attrib_value;
}

From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrUppercaseValueDynamicExtractor.java

@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {

    String attrib_value = null;/*from  w  w  w.  ja  v a 2  s.  com*/

    if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) {
        String cssSelector = attributeDetail.extractor_args.get(0);
        Element element = articleDoc.select(cssSelector).first();
        if (StringUtils.isNotBlank(cssSelector)) {
            int arg_count = 0;
            for (String value_name : attributeDetail.extractor_args) {
                if (arg_count > 0) { // skip the first one, its the cssSelector
                    if (element != null && element.attr(value_name) != null) {
                        attrib_value = element.attr(value_name);
                        if (StringUtils.isNotBlank(attrib_value)) {
                            break;
                        }
                    }
                }
                arg_count++;
            }
        }
    }

    attrib_value = (attrib_value != null) ? attrib_value.toUpperCase() : attrib_value;

    return attrib_value;
}

From source file:org.jresponder.message.MessageRefImpl.java

@Override
public synchronized void refresh() throws InvalidMessageException {

    try {//from ww  w. j  a v a  2 s  .co m

        logger().debug("MessageRef - Starting refresh for: {}", file.getCanonicalPath());

        // set timestamp
        fileContentsTimestamp = file.lastModified();

        StringBuilder myStringBuilder = new StringBuilder();
        char[] buf = new char[4096];
        BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        int len;
        while ((len = r.read(buf)) > 0) {
            myStringBuilder.append(buf, 0, len);
        }
        r.close();

        fileContents = myStringBuilder.toString();

        document = Jsoup.parse(fileContents, "UTF-8");

        propMap = new HashMap<String, String>();

        Elements myMetaTagElements = document.select("meta");
        if (myMetaTagElements == null || myMetaTagElements.isEmpty()) {
            throw new InvalidMessageException("No meta tags found in file: " + file.getCanonicalPath());
        }

        for (Element myPropElement : myMetaTagElements) {
            String myName = myPropElement.attr("name");
            String myValue = myPropElement.attr("content");
            propMap.put(myName, myValue);
        }

        // bodies are not read at all until message generation time

    } catch (IOException e) {
        throw new InvalidMessageException(e);
    }

    // debug dump
    if (logger().isDebugEnabled()) {
        for (String myKey : propMap.keySet()) {
            logger().debug("  property -- {}: {}", myKey, (propMap.get(myKey)));
        }
    }
}

From source file:com.isomorphic.maven.packaging.Downloads.java

/**
 * Interrogates the remote server for a list of hyperlinks matching the given distribution's {@link Distribution#getRemoteIndexFilter() filter}.
 * /*w  ww. ja  v  a  2  s.c  o  m*/
 * @param dist the build in which some files should exist
 * @return a String array of html href attributes
 * @throws MojoExecutionException
 */
private String[] list(Distribution dist) throws MojoExecutionException {

    HttpGet request = new HttpGet(dist.getRemoteIndex());
    HttpResponse response;

    try {

        LOGGER.debug("Requesting list of files from {}{}", DOMAIN, dist.getRemoteIndex());
        response = httpClient.execute(host, request);

    } catch (Exception e) {
        throw new MojoExecutionException("Error issuing GET request for bundle at '" + request + "'", e);
    }

    Document doc;

    try {

        String html = EntityUtils.toString(response.getEntity());
        doc = Jsoup.parse(html);
        doc.outputSettings().prettyPrint(true);

    } catch (Exception e) {
        throw new MojoExecutionException("Error processing response from '" + request + "'", e);
    }

    List<String> result = new ArrayList<String>();

    Elements links = doc.select(dist.getRemoteIndexFilter());

    for (Element element : links) {
        String href = element.attr("href");
        result.add(href);
    }

    if (result.isEmpty()) {
        String msg = String.format("No downloads found at '%s%s'.  Response from server: \n\n%s\n", DOMAIN,
                dist.getRemoteIndex(), doc.html());
        LOGGER.warn(msg);
    }

    return result.toArray(new String[0]);
}

From source file:me.vertretungsplan.parser.DSBLightParser.java

private void parsePreProgram(SubstitutionSchedule v, Map<String, String> referer, Element iframe)
        throws IOException, CredentialInvalidException, JSONException {
    Pattern regex = Pattern.compile("location\\.href=\"([^\"]*)\"");
    // PreProgram.aspx
    String response2 = httpGet(iframe.attr("src"), ENCODING, referer);
    Matcher matcher = regex.matcher(response2);
    if (matcher.find()) {
        // Program.aspx
        String url = matcher.group(1);
        parseProgram(url, v, referer);//w  w w .  j ava 2 s .  co  m
    } else {
        throw new IOException("URL nicht gefunden");
    }
}

From source file:com.astamuse.asta4d.web.form.flow.base.BasicFormFlowSnippetTrait.java

default Renderer hideCascadeFormTemplateDOM(Class<?> subFormType) {
    return Renderer.create(":root", new ElementSetter() {
        @Override//from w w  w.  j av  a2 s . c o m
        public void set(Element elem) {
            String style = elem.attr("style");
            if (StringUtils.isEmpty(style)) {
                style = "display:none";
            } else {
                if (!style.endsWith(";")) {
                    style += ";";
                }
                style += "display:none";
            }
            elem.attr("style", style);
        }
    });
}

From source file:com.astamuse.asta4d.web.form.field.impl.AbstractRadioAndCheckboxRenderer.java

protected Renderer addDefaultAlternativeDom(final String editTargetSelector, final List<String> valueList) {
    final List<String> duplicatorRefList = new LinkedList<>();
    final List<String> idList = new LinkedList<>();
    ClosureVarRef<Boolean> editTargetExists = new ClosureVarRef<Boolean>(false);
    Renderer renderer = Renderer.create(editTargetSelector, new ElementSetter() {
        @Override/* w  ww.ja  va  2s .  co m*/
        public void set(Element elem) {
            String duplicatorRef = elem.attr(RadioPrepareRenderer.DUPLICATOR_REF_ATTR);
            if (StringUtils.isNotEmpty(duplicatorRef)) {
                duplicatorRefList.add(duplicatorRef);
            }
            idList.add(elem.id());
            editTargetExists.set(true);
        }
    });

    /*
    renderer.add(":root", () -> {
    return Renderer.create().addDebugger("current root for addDefaultAlternativeDom");
    });
    */

    renderer.add(":root", new Renderable() {
        @Override
        public Renderer render() {
            // skip create display alternative DOM if edit target does not exist.
            if (editTargetExists.get()) {
                // it is OK
            } else {
                return Renderer.create();
            }

            String attachTargetSelector;
            if (duplicatorRefList.size() > 0) {
                attachTargetSelector = SelectorUtil.attr(RadioPrepareRenderer.DUPLICATOR_REF_ID_ATTR,
                        duplicatorRefList.get(duplicatorRefList.size() - 1));
            } else if (idList.size() == 0) {
                String msg = "The target item[%s] must have id specified.";
                throw new IllegalArgumentException(String.format(msg, editTargetSelector));
            } else {
                attachTargetSelector = SelectorUtil.id(idList.get(idList.size() - 1));
            }
            return new Renderer(attachTargetSelector, new ElementTransformer(null) {
                @Override
                public Element invoke(Element elem) {
                    GroupNode group = new GroupNode();

                    Element editClone = elem.clone();
                    group.appendChild(editClone);

                    for (String v : valueList) {
                        String nonNullString = retrieveDisplayStringFromStoredOptionValueMap(editTargetSelector,
                                v);
                        group.appendChild(createAlternativeDisplayElement(nonNullString));
                    }
                    return group;
                }// invoke
            });// new renderer
        }// render()
    });// renderable

    return renderer;
}

From source file:org.sbs.goodcrawler.extractor.selector.PageElementSelector.java

@SuppressWarnings("unchecked")
@Override//  www. j ava2  s. com
public HashMap<String, Object> getContent() throws ExtractException {
    if (null != content && !newDoc) {
        return content;
    }
    // ??document
    if (!newDoc) {
        return content;
    }
    // ?documentSelector
    List<String> urls = Lists.newArrayList();
    if (super.document != null) {
        Elements elements = super.document.select(value);
        if (elements.isEmpty())
            return null;
        switch ($Attr) {
        case text:
            for (Element e : elements) {
                urls.add(e.text());
            }
            break;
        default:
            for (Element e : elements) {
                urls.add(e.attr(attr));
            }
            break;
        }
    }
    if (urls.size() > 0) {
        content = Maps.newHashMap();
        for (String url : urls) {
            Document doc = null;
            PageFetchResult result = null;
            try {
                WebURL webUrl = new WebURL();
                webUrl.setURL(url);
                result = FetchForeman.fetcher.fetchHeader(webUrl);
                // ??
                int statusCode = result.getStatusCode();
                if (statusCode == CustomFetchStatus.PageTooBig) {
                    return null;
                }
                if (statusCode != HttpStatus.SC_OK) {
                    return null;
                } else {
                    Page page = new Page(webUrl);
                    if (!result.fetchContent(page)) {
                        return null;
                    }
                    if (!parser.parse(page, webUrl.getURL())) {
                        return null;
                    }
                    doc = Jsoup.parse(new String(page.getContentData(), page.getContentCharset()),
                            urlUtils.getBaseUrl(page.getWebURL().getURL()));
                }
            } catch (IOException e) {
                e.printStackTrace();
                throw new ExtractException(e.getMessage());
            } finally {
                if (result != null)
                    result.discardContentIfNotConsumed();
            }

            if (selectors != null)
                for (AbstractElementCssSelector<?> selector : selectors) {
                    if (selector instanceof FileElementCssSelector) {
                        Map<String, Object> m = ((FileElementCssSelector) selector).setResult(content)
                                .setDocument(doc).getContentMap();
                        if ((null == m || m.size() == 0) && selector.isRequired()) {
                            return null;
                        } else {
                            if (null != m && m.size() > 0)
                                content = MapUtils.mager(content, (HashMap<String, Object>) m);
                        }
                    } else {
                        Map<String, Object> m = selector.setDocument(doc).getContentMap();
                        if ((null == m || m.size() == 0) && selector.isRequired()) {
                            return null;
                        } else {
                            if (null != m && m.size() > 0)
                                content = MapUtils.mager(content, (HashMap<String, Object>) m);
                        }
                    }
                }
        }
        return content;
    }
    newDoc = false;
    return null;
}