Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:com.astamuse.asta4d.render.RenderUtil.java

private final static void apply(Element target, List<Renderer> rendererList, RenderAction renderAction,
        int startIndex, int count) {

    // The renderer list have to be applied recursively because the
    // transformer will always return a new Element clone.

    if (startIndex >= count) {
        return;/*www .j  a v a2s .c om*/
    }

    final Renderer currentRenderer = rendererList.get(startIndex);

    RendererType rendererType = currentRenderer.getRendererType();

    switch (rendererType) {
    case GO_THROUGH:
        apply(target, rendererList, renderAction, startIndex + 1, count);
        return;
    /*
    case DEBUG:
    currentRenderer.getTransformerList().get(0).invoke(target);
    apply(target, rendererList, renderAction, startIndex + 1, count);
    return;
    */
    case RENDER_ACTION:
        ((RenderActionRenderer) currentRenderer).getStyle().apply(renderAction);
        apply(target, rendererList, renderAction, startIndex + 1, count);
        return;
    default:
        // do nothing
        break;
    }

    String selector = currentRenderer.getSelector();
    List<Transformer<?>> transformerList = currentRenderer.getTransformerList();

    List<Element> elemList;
    if (PSEUDO_ROOT_SELECTOR.equals(selector)) {
        elemList = new LinkedList<Element>();
        elemList.add(target);
    } else {
        elemList = new ArrayList<>(target.select(selector));
    }

    if (elemList.isEmpty()) {
        if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER) {
            elemList.add(target);
            transformerList.clear();
            transformerList.add(
                    new RendererTransformer(((ElementNotFoundHandler) currentRenderer).alternativeRenderer()));
        } else if (renderAction.isOutputMissingSelectorWarning()) {
            String creationInfo = currentRenderer.getCreationSiteInfo();
            if (creationInfo == null) {
                creationInfo = "";
            } else {
                creationInfo = " at [ " + creationInfo + " ]";
            }
            logger.warn(
                    "There is no element found for selector [{}]{}, if it is deserved, try Renderer#disableMissingSelectorWarning() "
                            + "to disable this message and Renderer#enableMissingSelectorWarning could enable this warning again in "
                            + "your renderer chain",
                    selector, creationInfo);
            apply(target, rendererList, renderAction, startIndex + 1, count);
            return;
        }

    } else {
        if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER) {
            apply(target, rendererList, renderAction, startIndex + 1, count);
            return;
        }
    }

    Element delayedElement = null;
    Element resultNode;
    // TODO we suppose that the element is listed as the order from parent
    // to children, so we reverse it. Perhaps we need a real order process
    // to ensure the wanted order.
    Collections.reverse(elemList);
    boolean renderForRoot;
    for (Element elem : elemList) {
        renderForRoot = PSEUDO_ROOT_SELECTOR.equals(selector)
                || rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER;
        if (!renderForRoot) {
            // faked group node will be not applied by renderers(only when the current selector is not the pseudo :root)
            if (elem.tagName().equals(ExtNodeConstants.GROUP_NODE_TAG)
                    && ExtNodeConstants.GROUP_NODE_ATTR_TYPE_FAKE
                            .equals(elem.attr(ExtNodeConstants.GROUP_NODE_ATTR_TYPE))) {
                continue;
            }
        }

        if (elem == target) {
            delayedElement = elem;
            continue;
        }
        for (Transformer<?> transformer : transformerList) {
            resultNode = transformer.invoke(elem);
            elem.before(resultNode);
        } // for transformer
        elem.remove();
    } // for element

    // if the root element is one of the process targets, we can not apply
    // the left renderers to original element because it will be replaced by
    // a new element even it is not necessary (that is how Transformer
    // works).
    if (delayedElement == null) {
        apply(target, rendererList, renderAction, startIndex + 1, count);
    } else {
        if (rendererType == RendererType.ELEMENT_NOT_FOUND_HANDLER && delayedElement instanceof Document) {
            delayedElement = delayedElement.child(0);
        }
        for (Transformer<?> transformer : transformerList) {
            resultNode = transformer.invoke(delayedElement);
            delayedElement.before(resultNode);
            apply(resultNode, rendererList, renderAction, startIndex + 1, count);
        } // for transformer
        delayedElement.remove();
    }

}

From source file:jobhunter.infoempleo.Client.java

private String getCompany(final Document doc) {
    Element el = doc.getElementById("ctl00_CPH_Body_Logo_Empresa");
    return el != null ? el.attr("title") : "";
}

From source file:com.astamuse.asta4d.web.form.field.SimpleFormFieldValueRenderer.java

protected Renderer hideTarget(final String targetSelector) {
    Renderer render = Renderer.create().disableMissingSelectorWarning();
    return render.add(targetSelector, new ElementSetter() {
        @Override/*from   ww w.  j  av  a  2  s  . c  o  m*/
        public void set(Element elem) {
            String style = elem.attr("style");
            if (style != null) {
                style = style.trim();
            }

            if (StringUtils.isEmpty(style)) {
                style = "display:none";
            } else {
                if (style.endsWith(";")) {
                    style = style + "display:none";
                } else {
                    style = style + ";display:none";
                }
            }

            elem.attr("style", style);
        }
    }).enableMissingSelectorWarning();
}

From source file:ch.admin.hermes.etl.load.HermesOnlineCrawler.java

/**
 * Liefert die URL's zu den Vorlagen /*from  w w w  .j  a v a 2  s  .  c o m*/
 * @param scenario Szenario
 * @return
 * @throws Exception Allgemeiner I/O Fehler
 */
public String[] getTemplatesURL(String scenario) throws Exception {
    ArrayList<String> s = new ArrayList<String>();
    HttpGet get = new HttpGet(url + scenario_prefix + scenario + templates);

    HttpResponse response = httpClient.execute(get);

    HttpEntity entity = response.getEntity();
    String pageHTML = EntityUtils.toString(entity);
    EntityUtils.consume(entity);

    Document document = Jsoup.parse(pageHTML);
    Elements elements = document.getElementsByAttribute("href");
    for (Element e : elements) {
        String attr = e.attr("href");
        if (attr.endsWith(".docx") || attr.endsWith(".xlsx") || attr.endsWith(".pptx"))
            s.add(url + scenario_prefix + scenario + templates + attr);
    }
    return (s.toArray(new String[s.size()]));
}

From source file:com.crosstreelabs.cognitio.gumshoe.format.HtmlFormatHandler.java

@Override
public void processLinks(final Visit visit) {
    try {/*w w  w.ja  v a 2  s . c o  m*/
        String charset = StringUtils.defaultIfBlank(visit.contentCharset, "UTF-8");

        Document doc = Jsoup.parse(visit.contentStream, charset, visit.result.location);
        Elements anchors = doc.getElementsByTag("a");

        for (Element e : anchors) {
            String url = stripURLFragmentIdentifier(e.attr("abs:href"));
            String uri = stripURLFragmentIdentifier(e.attr("href").toLowerCase());
            if (uri.isEmpty() || url.isEmpty() || uri.contains("javascript:") || uri.contains("mailto:")
                    || uri.contains("@")) {
                continue;
            }

            visit.discoveredLinks.add(URL.parse(url).toString()); // TODO Need to add the link text as the title
        }
        visit.contentStream.reset();
    } catch (GalimatiasParseException | IOException ex) {
        throw new RuntimeException(ex);
    }
}

From source file:ch.admin.hermes.etl.load.HermesOnlineCrawler.java

/**
 * Liefert alle Szenarion URL's /*www.ja v  a2  s . co  m*/
 * @return 
 * @throws Exception Allgemeiner I/O Fehler
 */
public String[] getScenarios() throws Exception {
    ArrayList<String> s = new ArrayList<String>();
    HttpGet get = new HttpGet(url + scenarios);

    try {
        HttpResponse response = httpClient.execute(get);

        HttpEntity entity = response.getEntity();
        String pageHTML = EntityUtils.toString(entity);
        EntityUtils.consume(entity);

        Document document = Jsoup.parse(pageHTML);
        Elements elements = document.getElementsByAttribute("href");
        for (Element e : elements) {
            if (e.attr("href").startsWith("/szenarien")) {
                String attr = e.attr("href").substring(scenario_prefix.length());
                attr = attr.substring(0, attr.lastIndexOf('/'));
                s.add(attr);
            }
        }
    } catch (Exception e) {
        JOptionPane.showMessageDialog(null,
                "Keine Online Verbindung mglich. Bitte Szenario manuell downloaden, entpacken und bei XMl Model eintragen.",
                "Keine Verbindung zu http://www.hermes.admin.ch", JOptionPane.WARNING_MESSAGE);

    }
    return (s.toArray(new String[s.size()]));
}

From source file:org.javiermoreno.torrentscratcher.Runner.java

public List<String> getRecordsUrl(int page) throws IOException {
    List<String> result = new ArrayList<>();

    String url = "http://www.elitetorrent.net/categoria/13/peliculas-hdrip/modo:listado/orden:valoracion/pag:{page}";
    url = url.replace("{page}", String.valueOf(page));
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a.nombre");
    for (Element elem : links) {
        result.add(elem.attr("href"));
    }/*from  ww  w  .j  a  v  a  2 s. co  m*/
    return result;
}

From source file:com.webcrawler.MailCrawlerService.java

/**
 * Gets the absolute mail urls.//from w ww  . j a v a  2s  .c  o  m
 *
 * @param linkElements the link elements
 * @param searchToken the search token
 * @return the absolute mail urls
 * @throws IOException Signals that an I/O exception has occurred.
 */
private List<String> getAbsoluteMailUrls(Elements linkElements, String searchToken) throws IOException {
    List<String> absoluteURLList = new ArrayList<String>();
    List<Element> relativeURLList = new ArrayList<Element>();
    for (Element linkElement : linkElements) {
        String absouleUrl = linkElement.attr("abs:href");
        Elements anchorElements = getLinkElements(Jsoup.connect(absouleUrl).get(), "a");
        CollectionUtils.select(anchorElements, getLinkFilterPredicate(getRegexMailUrlPattern(searchToken)),
                relativeURLList);
    }
    for (Element element : relativeURLList) {
        absoluteURLList.add(element.attr("abs:href"));
    }
    if (log.isDebugEnabled()) {
        log.debug("Absolute URL List: " + absoluteURLList.toString());
    }
    return absoluteURLList;
}

From source file:com.abixen.platform.core.service.impl.LayoutServiceImpl.java

@Override
public String htmlLayoutToJson(String htmlString) {

    log.debug("htmlLayoutToJson() - htmlString: " + htmlString);

    Document doc = Jsoup.parse(htmlString);
    Elements htmlRows = doc.getElementsByClass("row");
    List<LayoutRowUtil> rowUtilList = new ArrayList<>();

    for (Element row : htmlRows) {

        Document rowDoc = Jsoup.parse(row.toString());
        Elements htmlColumns = rowDoc.getElementsByClass("column");
        List<LayoutColumnUtil> columnUtilList = new ArrayList<>();

        for (Element column : htmlColumns) {
            String styleClass = column.attr("class");
            columnUtilList.add(new LayoutColumnUtil(styleClass.substring(styleClass.indexOf(" ") + 1)));
        }/*from   ww  w  . j  av  a2s . c  o m*/

        rowUtilList.add(new LayoutRowUtil(columnUtilList));
    }

    return "{\"rows\":" + new Gson().toJson(rowUtilList) + "}";
}

From source file:com.webcrawler.MailCrawlerService.java

/**
 * Gets the link filter predicate./*from w  w  w .j a  v  a2s . co  m*/
 *
 * @param shouldVisitPattern the should visit pattern
 * @return the link filter predicate
 */
private Predicate getLinkFilterPredicate(final String shouldVisitPattern) {
    return new Predicate() {
        public boolean evaluate(Object arg0) {
            Pattern pattern = Pattern.compile(shouldVisitPattern);
            Element linkElement = (Element) arg0;
            String absoluteUrl = linkElement.attr("abs:href");
            Matcher matcher = pattern.matcher(absoluteUrl);
            if (matcher.find()) {
                if (MailCrawlerService.log.isDebugEnabled()) {
                    MailCrawlerService.log.debug("Should be visited: " + absoluteUrl);
                }
                return true;
            }
            if (MailCrawlerService.log.isDebugEnabled()) {
                MailCrawlerService.log.debug("Should not be visited: " + absoluteUrl);
            }
            return false;
        }
    };
}