Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:lolthx.autohome.buy.AutohomePriceListFetch.java

@Override
public void parse(String result, Task task) throws Exception {
    if (StringUtils.isBlank(result)) {
        return;/*from  www. j a v a  2 s.  c  o m*/
    }

    Date start = task.getStartDate();
    Date end = task.getEndDate();

    Document doc = Jsoup.parse(result);
    Elements lis = doc.select("li.price-item");

    AutohomePriceInfoBean bean = new AutohomePriceInfoBean();

    for (Element li : lis) {

        try {
            Elements postTimeEl = li.select("div.user-name span");
            String postTime = "";
            if (!postTimeEl.isEmpty()) {
                postTime = StringUtils.trim(
                        StringUtils.substringBefore(postTimeEl.first().text(), "?").replaceAll("", ""));

                if (!isTime(postTime, start, end)) {
                    continue;
                }
            }
            bean.setPostTime(postTime);
            bean.setUrl(task.getUrl());
            bean.setForumId(StringUtils.substringBefore(task.getExtra(), ":"));
            bean.setProjectName(task.getProjectName());
            bean.setKeyword(StringUtils.substringAfter(task.getExtra(), ":"));

            // post id
            Elements id = li.select("div.price-share a.share");
            if (!id.isEmpty()) {
                String idStr = id.first().attr("data-target");
                idStr = StringUtils.substringAfterLast(idStr, "_");
                if (StringUtils.isBlank(idStr)) {
                    continue;
                }

                bean.setId(idStr);
            }

            // 
            Elements user = li.select("div.user-name a");
            if (!user.isEmpty()) {
                String userUrl = user.first().absUrl("href");
                String userId = StringUtils.substringAfterLast(userUrl, "/");
                String userName = user.first().text();

                bean.setUserId(userId);
                bean.setUserUrl(userUrl);
                bean.setUserName(userName);
            }

            Elements dataLis = li.select("div.price-item-bd li");
            for (Element dataLi : dataLis) {
                String data = dataLi.text();

                if (StringUtils.startsWith(data, "")) {
                    bean.setCar(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "")) {
                    bean.setPrice(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "")) {
                    bean.setGuidePrice(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "?")) {
                    bean.setTotalPrice(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "")) {
                    bean.setPurchaseTax(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "?")) {
                    bean.setCommercialInsurance(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "")) {
                    bean.setVehicleUseTax(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "")) {
                    bean.setCompulsoryInsurance(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "")) {
                    bean.setLicenseFee(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "?")) {
                    bean.setPromotion(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "")) {
                    bean.setBuyTime(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "")) {
                    String area = StringUtils.trim(StringUtils.substringAfter(data, ""));
                    String[] pAndC = StringUtils.splitByWholeSeparator(area, ",", 2);

                    if (pAndC.length == 1) {
                        bean.setBuyProvince(pAndC[0]);
                        bean.setBuyCity(pAndC[0]);
                    }

                    if (pAndC.length == 2) {
                        bean.setBuyProvince(pAndC[0]);
                        bean.setBuyCity(pAndC[1]);
                    }

                }
                if (StringUtils.startsWith(data, "")) {
                    Elements level = dataLi.select("span.level");
                    // 
                    if (!level.isEmpty()) {
                        bean.setSellerComment(level.first().text());
                    }

                    // ?
                    Elements seller = dataLi.select("a.title");
                    if (!seller.isEmpty()) {
                        String sellerUrl = seller.first().absUrl("href");
                        String sellerName = seller.first().text();
                        String sellerId = StringUtils.substringAfterLast(sellerUrl, "/");

                        bean.setSellerId(sellerId);
                        bean.setSellerName(sellerName);
                        bean.setSellerUrl(sellerUrl);
                    }

                    // ?
                    Elements sellerPhone = dataLi.select("em.phone-num");
                    if (!sellerPhone.isEmpty()) {
                        bean.setSellerPhone(sellerPhone.first().text());
                    }

                    // ?
                    // Elements sellerAddress =
                    // dataLi.select("em.phone-num");

                }
                if (StringUtils.startsWith(data, "?")) {
                    bean.setBuyFeeling(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
            }
            bean.saveOnNotExist();
        } catch (Exception e) {
            e.printStackTrace();
            continue;
        }
    }
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

private Message parseIdentityRecommendation(Document doc) {
    Message message = null;//from w  ww  .j av a  2s.  com
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "EventName: IdentityRecommendation\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);
            Elements events = doc.select("ns1|eventData");
            org.jsoup.nodes.Element event = events.get(0);
            Elements elements = event.getAllElements();
            for (org.jsoup.nodes.Element element : elements) {
                content = content + element.tagName() + " : " + element.ownText() + "\n";
            }
            message.setSubject("Identity Recommendation");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

public Message parseIdentityVerification(Document doc) {
    Message message = null;// ww w  .  ja  v  a  2s  . c o  m
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "EventName: IdentityVerification\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);
            Elements events = doc.select("ns1|eventData");
            org.jsoup.nodes.Element event = events.get(0);
            Elements elements = event.getAllElements();
            for (org.jsoup.nodes.Element element : elements) {
                content = content + element.tagName() + " : " + element.ownText() + "\n";
            }
            message.setSubject("Identity Verification");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:cd.go.contrib.elasticagents.dockerswarm.elasticagent.executors.AgentStatusReportExecutorTest.java

private boolean hasEnvironmentVariable(Document document, String name, String value) {
    final Elements elements = document.select(
            MessageFormat.format(".environments .name-value .name-value_pair label:contains({0})", name));
    if (elements.isEmpty()) {
        return false;
    }//from   w w  w.ja  v  a 2s.c o  m

    final String envValueSpanText = StringUtils.stripToNull(elements.get(0).parent().select("span").text());
    return StringUtils.equals(value, envValueSpanText);
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

public static AccountData parse_account(Account acc, Document doc, JSONObject data) throws JSONException {
    JSONObject copymap = data.getJSONObject("accounttable");

    List<LentItem> media = new ArrayList<>();

    if (doc.select(".kontozeile_center table").size() == 0) {
        return null;
    }//from  w w w  .  ja  v a 2  s .  com

    Elements exemplartrs = doc.select(".kontozeile_center table").get(0).select("tr.tabKonto");

    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);

    for (int i = 0; i < exemplartrs.size(); i++) {
        Element tr = exemplartrs.get(i);
        LentItem item = new LentItem();

        Iterator<?> keys = copymap.keys();
        while (keys.hasNext()) {
            String key = (String) keys.next();
            int index;
            try {
                index = copymap.has(key) ? copymap.getInt(key) : -1;
            } catch (JSONException e1) {
                index = -1;
            }
            if (index >= 0) {
                if (key.equals("prolongurl")) {
                    if (tr.child(index).children().size() > 0) {
                        item.setProlongData(tr.child(index).child(0).attr("href"));
                        item.setRenewable(tr.child(index).child(0).attr("href").contains("vermsg"));
                    }
                } else if (key.equals("returndate")) {
                    try {
                        item.setDeadline(fmt.parseLocalDate(tr.child(index).text()));
                    } catch (IllegalArgumentException e1) {
                        e1.printStackTrace();
                    }
                } else {
                    item.set(key, tr.child(index).text());
                }
            }
        }

        media.add(item);
    }
    assert (doc.select(".kontozeile_center table").get(0).select("tr").size() > 0);
    assert (exemplartrs.size() == media.size());

    copymap = data.getJSONObject("reservationtable");

    List<ReservedItem> reservations = new ArrayList<>();
    exemplartrs = doc.select(".kontozeile_center table").get(1).select("tr.tabKonto");
    for (int i = 0; i < exemplartrs.size(); i++) {
        Element tr = exemplartrs.get(i);
        ReservedItem item = new ReservedItem();

        Iterator<?> keys = copymap.keys();
        while (keys.hasNext()) {
            String key = (String) keys.next();
            int index;
            try {
                index = copymap.has(key) ? copymap.getInt(key) : -1;
            } catch (JSONException e1) {
                index = -1;
            }
            if (index >= 0) {
                if (key.equals("cancelurl")) {
                    if (tr.child(index).children().size() > 0) {
                        item.setCancelData(tr.child(index).child(0).attr("href"));
                    }
                } else if (key.equals("availability")) {
                    try {
                        item.setReadyDate(fmt.parseLocalDate(tr.child(index).text()));
                    } catch (IllegalArgumentException e1) {
                        item.setStatus(tr.child(index).text());
                    }
                } else if (key.equals("expirationdate")) {
                    try {
                        item.setExpirationDate(fmt.parseLocalDate(tr.child(index).text()));
                    } catch (IllegalArgumentException e1) {
                        item.setStatus(tr.child(index).text());
                    }
                } else {
                    item.set(key, tr.child(index).text());
                }
            }
        }

        reservations.add(item);
    }
    assert (doc.select(".kontozeile_center table").get(1).select("tr").size() > 0);
    assert (exemplartrs.size() == reservations.size());

    AccountData res = new AccountData(acc.getId());

    for (Element row : doc.select(".kontozeile_center, div[align=center]")) {
        String text = row.text().trim();
        if (text.matches(".*Ausstehende Geb.+hren:[^0-9]+([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr.).*")) {
            text = text.replaceAll(
                    ".*Ausstehende Geb.+hren:[^0-9]+([0-9.," + "]+)[^0-9A-Z]*(|EUR|CHF|Fr.).*", "$1 $2");
            res.setPendingFees(text);
        }
        if (text.matches("Ihr Ausweis ist g.ltig bis:.*")) {
            text = text.replaceAll("Ihr Ausweis ist g.ltig bis:[^A-Za-z0-9]+", "");
            res.setValidUntil(text);
        } else if (text.matches("Ausweis g.ltig bis:.*")) {
            text = text.replaceAll("Ausweis g.ltig bis:[^A-Za-z0-9]+", "");
            res.setValidUntil(text);
        }
    }

    res.setLent(media);
    res.setReservations(reservations);
    return res;
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

public Message parseIssueRecommendation(Document doc) {
    Message message = null;/*from w  ww  .j  av a2 s . c  o m*/
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "EventName: IssueRecommendation\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);

            Elements events = doc.select("ns1|eventData");
            org.jsoup.nodes.Element event = events.get(0);
            Elements elements = event.getAllElements();
            for (org.jsoup.nodes.Element element : elements) {
                content = content + element.tagName() + " : " + element.ownText() + "\n";
            }
            message.setSubject("Issue Recommendation");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:org.keycloak.testsuite.util.saml.ModifySamlResponseStepBuilder.java

private HttpUriRequest handlePostBinding(CloseableHttpResponse currentResponse) throws Exception {
    assertThat(currentResponse, statusCodeIsHC(Status.OK));

    final String htmlBody = EntityUtils.toString(currentResponse.getEntity());
    assertThat(htmlBody, Matchers.containsString("SAML"));
    org.jsoup.nodes.Document theResponsePage = Jsoup.parse(htmlBody);
    Elements samlResponses = theResponsePage.select("input[name=SAMLResponse]");
    Elements samlRequests = theResponsePage.select("input[name=SAMLRequest]");
    Elements forms = theResponsePage.select("form");
    Elements relayStates = theResponsePage.select("input[name=RelayState]");
    int size = samlResponses.size() + samlRequests.size();
    assertThat("Checking uniqueness of SAMLResponse/SAMLRequest input field in the page", size, is(1));
    assertThat("Checking uniqueness of forms in the page", forms, hasSize(1));

    Element respElement = samlResponses.isEmpty() ? samlRequests.first() : samlResponses.first();
    Element form = forms.first();

    String base64EncodedSamlDoc = respElement.val();
    InputStream decoded = PostBindingUtil.base64DecodeAsStream(base64EncodedSamlDoc);
    String samlDoc = IOUtils.toString(decoded, GeneralConstants.SAML_CHARSET);
    IOUtils.closeQuietly(decoded);//from   www .j  a va  2  s  .  co m

    String transformed = getTransformer().transform(samlDoc);
    if (transformed == null) {
        return null;
    }

    final String attributeName = this.targetAttribute != null ? this.targetAttribute : respElement.attr("name");
    List<NameValuePair> parameters = new LinkedList<>();

    if (!relayStates.isEmpty()) {
        parameters.add(new BasicNameValuePair(GeneralConstants.RELAY_STATE, relayStates.first().val()));
    }
    URI locationUri = this.targetUri != null ? this.targetUri : URI.create(form.attr("action"));

    return createRequest(locationUri, attributeName, transformed, parameters);
}

From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java

private void renderSyntaxHighlightingHtml(final Document slidesDocument, final Configuration config) {
    for (Element code : slidesDocument.select("code")) {
        Charset encoding = config.getInputEncoding();
        ByteArrayInputStream input = new ByteArrayInputStream(code.text().getBytes(encoding));
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        String className = code.className();
        if (StringUtils.isBlank(className)) {
            className = "java";
        }// ww w. j  av a2 s.c  o  m
        Renderer renderer = XhtmlRendererFactory.getRenderer(className);
        if (renderer != null) {
            try {
                renderer.highlight("slidery", input, out, encoding.name(), true);
                code.html(new String(out.toByteArray(), encoding));
                code.select("br").remove();
                removeComments(code);
                code.html(code.html().trim());
                Element parent = code.parent();
                if (parent.tagName() == "pre") {
                    parent.addClass("code");
                }
            } catch (IOException e) {
                // TODO: Handle exception
            }
        }
    }
}

From source file:de.fzi.ALERT.actor.MessageObserver.NotificationObserver.JMSNotificationParser.java

public Message parseSimilarIssues(Document doc) {
    Message message = null;// w ww .ja  v a  2  s.  c  o  m
    try {

        Elements pID = doc.select("p|patternId");
        String patternId = pID.get(0).text();
        // String message = doc.select(query);
        Pattern pattern;
        pattern = patternDAO.findById(patternId);

        String content = "Similar Issues\n";

        if (pattern != null) {
            message = new Message();
            message.setPatternId(pattern);

            Elements items = doc.select("item");
            for (org.jsoup.nodes.Element item : items) {
                String subject = item.getElementsByTag("subject").text();
                String issueurl = item.getElementsByTag("url").text();
                String similarity = item.getElementsByTag("similarity").text();
                content = content + subject + "\n" + issueurl + "\n" + similarity + "\n";
            }
            message.setSubject("Similar Issues");
            message.setSummary("default summary");
            message.setContent(content);
            message.setMsgDate(new Date());
            message.setMsgID(1);
        } else
            System.out.println("can't find patternID of the complex event:" + patternId);

    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    return message;
}

From source file:mx.clickfactura.util.TipoCambioUtil.java

public String getTipoCambio(String fecha) throws CustomBadRequestException, CustomNotFoundException, Exception {

    Pattern pattern = Pattern.compile("^\\d{4}\\-\\d{2}\\-\\d{2}$");
    Matcher matcher = null;/*from ww  w  . ja  v  a  2  s . co m*/

    matcher = pattern.matcher(fecha.trim());

    if (!matcher.matches()) {
        throw new CustomBadRequestException("Fecha invalida, el formato debe ser: yyyy-MM-dd");
    }

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");

    Calendar cal = new GregorianCalendar();

    cal.setTime(sdf.parse(fecha));

    String dia = (cal.get(Calendar.DATE) < 10) ? "0" + cal.get(Calendar.DATE) : cal.get(Calendar.DATE) + "";
    String mes = ((cal.get(Calendar.MONTH) + 1) < 10) ? "0" + (cal.get(Calendar.MONTH) + 1)
            : (cal.get(Calendar.MONTH) + 1) + "";
    String anio = cal.get(Calendar.YEAR) + "";

    String fechaInicial = dia + "%2F" + mes + "%2F" + anio;

    CloseableHttpClient client = HttpClients.createDefault();
    CookieStore cookies = new BasicCookieStore();
    String[] fechaSeparada = fecha.split("-");
    HttpGet get = new HttpGet("http://www.dof.gob.mx/indicadores_detalle.php?cod_tipo_indicador=158&dfecha="
            + fechaInicial + "&hfecha=" + fechaInicial);

    HttpContext httpContext = new BasicHttpContext();
    httpContext.setAttribute(HttpClientContext.COOKIE_STORE, cookies);
    CloseableHttpResponse response = client.execute(get, httpContext);

    //System.out.println(response.toString());      
    //System.out.println(response.getStatusLine());
    //System.out.println(response.getEntity().getContentLength());
    InputStream in = response.getEntity().getContent();
    Header encoding = response.getEntity().getContentEncoding();

    String body = IOUtils.toString(in, "UTF-8");
    //System.out.println(body);

    Document doc = Jsoup.parse(body, "UTF-8");

    doc = doc.normalise();

    //System.out.println(doc.toString());
    Elements e = doc.select("table");

    Iterator iterator = e.iterator();

    pattern = Pattern.compile("^\\d{2}\\.\\d{6}$");
    matcher = null;

    String tipoCambio = null;

    while (iterator.hasNext()) {
        Element xd = (Element) iterator.next();
        if (xd.getElementsByClass("txt").hasAttr("height")) {
            if (xd.getElementsByClass("txt").text().split(" ").length == 6) {

                String cambio = xd.getElementsByClass("txt").text().split(" ")[5];
                matcher = pattern.matcher(cambio.trim());

                if (matcher.matches()) {
                    tipoCambio = cambio;
                    //System.out.println(tipoCambio);
                    break;
                }

            }

        }

    }

    client.close();
    response.close();

    if (tipoCambio == null || tipoCambio.isEmpty()) {
        throw new CustomNotFoundException("No hay un tipo de cambio para el da: " + fecha);

    }

    return tipoCambio;

}