Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.amastigote.xdu.query.module.EduSystem.java

@Override
public boolean checkIsLogin(String username) throws IOException {
    URL url = new URL(SYS_HOST + SYS_SUFFIX);
    HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
    httpURLConnection.setInstanceFollowRedirects(false);
    httpURLConnection.setRequestProperty("Cookie", "JSESSIONID=" + SYS_JSESSIONID);
    httpURLConnection.connect();//from   w  ww  . j a va2s  .c o m

    Document document = Jsoup.parse(httpURLConnection.getInputStream(), "gb2312",
            httpURLConnection.getURL().toString());
    if (document.select("title").size() == 0) {
        return false;
    } else if (document.select("title").get(0).text().equals("?")) {
        ID = username;
        return true;
    }
    return false;
}

From source file:mergedoc.core.APIDocument.java

/**
 * ? Javadoc ????//  w  w w. j  av  a 2 s.  com
 * @param className ??
 * @param docHtml API 
 */
private void parseMethodComment(String className, Document doc) {
    Elements elements = doc.select("body > div.contentContainer > div.details > ul > li > ul > li > ul > li");
    for (Element element : elements) {
        Element sigElm = element.select("pre").first();
        if (sigElm == null) {
            continue;
        }
        String sigStr = sigElm.html();
        Signature sig = createSignature(className, sigStr);
        Comment comment = new Comment(sig);

        // deprecated 
        String depre = "";
        Elements divs = element.select("div");
        if (divs.size() == 2) {
            depre = divs.get(0).html();
        }
        if (divs.size() > 0) {
            String body = divs.last().html();
            body = formatLinkTag(className, body);
            comment.setDocumentBody(body);
        }

        Elements dtTags = element.select("dl dt");
        for (Element dtTag : dtTags) {
            String dtText = dtTag.text();
            if (dtText.contains(":")) {
                Element dd = dtTag;
                while (true) {
                    dd = dd.nextElementSibling();
                    if (dd == null || dd.tagName().equalsIgnoreCase("dd") == false) {
                        break;
                    }
                    String name = dd.select("code").first().text();
                    if (dtText.contains(":")) {
                        name = "<" + name + ">";
                    }
                    String items = dd.html();
                    Pattern p = PatternCache
                            .getPattern("(?si)<CODE>(.+?)</CODE>\\s*-\\s*(.*?)(<DD>|</DD>|</DL>|<DT>|$)");
                    Matcher m = p.matcher(items);
                    if (m.find()) {
                        String desc = formatLinkTag(className, m.group(2));
                        comment.addParam(name, desc);
                    }
                }
                continue;
            }

            if (dtText.contains(":")) {
                Element dd = dtTag.nextElementSibling();
                String str = dd.html();
                str = formatLinkTag(className, str);
                comment.addReturn(str);
                continue;
            }

            if (dtText.contains(":")) {
                Element dd = dtTag;
                while (true) {
                    dd = dd.nextElementSibling();
                    if (dd == null || dd.tagName().equalsIgnoreCase("dd") == false) {
                        break;
                    }
                    String name = dd.select("code").first().text();
                    String items = dd.html();
                    Pattern p = PatternCache
                            .getPattern("(?si)<CODE>(.+?)</CODE>\\s*-\\s*(.*?)(<DD>|</DD>|</DL>|<DT>|$)");
                    Matcher m = p.matcher(items);
                    if (m.find()) {
                        String desc = formatLinkTag(className, m.group(2));
                        String param = name + " " + desc;
                        comment.addThrows(param);
                    }
                }
                continue;
            }

        }
        // deprecated 
        parseDeprecatedTag(className, depre, comment);

        // 
        parseCommonTag(className, element, comment);

        contextTable.put(sig, comment);
    }
}

From source file:gov.medicaid.screening.dao.impl.DentistryLicenseDAOBean.java

/**
 * Parses the Dentistry license details page.
 *
 * @param page the details page/*from ww w  .j  av  a2  s  .  co m*/
 * @return the parsed license details
 * @throws ParsingException if the expected tags were not found
 */
private License parseLicense(Document page) throws ParsingException {
    License license = new License();
    ProviderProfile profile = new ProviderProfile();
    license.setProfile(profile);

    String fullName = page.select("#lblName1").text();
    // remove mr. and ms.
    fullName = fullName.replaceFirst("Mr.", "").replaceFirst("Ms.", "").trim();

    User user = new User();
    profile.setUser(user);
    String[] nameParts = fullName.split(" ");
    user.setLastName(nameParts[nameParts.length - 1]);
    if (nameParts.length > 1) {
        user.setFirstName(nameParts[0]);
    }
    // everything else goes to middle name (per site behavior)
    if (nameParts.length > 2) {
        StringBuffer sb = new StringBuffer();
        for (int i = 1; i < nameParts.length - 1; i++) {
            if (sb.length() > 0) {
                sb.append(" ");
            }
            sb.append(nameParts[i]);
        }
        user.setMiddleName(sb.toString());
    }

    List<Address> addresses = new ArrayList<Address>();
    Address address = new Address();
    addresses.add(address);
    profile.setAddresses(addresses);
    String fullAddress = page.select("#lblMAddress").text();
    String stateAndZip = fullAddress.substring(fullAddress.lastIndexOf(",") + 1, fullAddress.length()).trim();
    String[] szParts = stateAndZip.split(" ");
    address.setState(szParts[0]);
    address.setZipcode(szParts[1]);
    address.setLocation(fullAddress.substring(0, fullAddress.lastIndexOf(",")).trim());

    LicenseType type = new LicenseType();
    license.setType(type);
    type.setName(page.select("#lblLicType").text());
    license.setLicenseNumber(page.select("#lblLicNum").text());

    LicenseStatus status = new LicenseStatus();
    license.setStatus(status);
    status.setName(page.select("#lblLicStatus").text());

    String issueDate = page.select("#lblFirstLicenseDate").text();
    if (Util.isNotBlank(issueDate)) {
        license.setOriginalIssueDate(parseDate(issueDate, DATE_FORMAT));
    }

    String expirationDate = page.select("#lblDateExpire").text();
    if (Util.isNotBlank(expirationDate)) {
        license.setExpireDate(parseDate(expirationDate, DATE_FORMAT));
    }

    Elements additionals = page.select("table#tblAdditional table td");
    license.setCorrectiveAction(Util.defaultString(additionals.get(6).text()).startsWith("[X]"));
    license.setDiscipline(Util.defaultString(additionals.get(7).text()).startsWith("[X]"));

    return license;
}

From source file:mobi.jenkinsci.ci.client.JenkinsClient.java

public ChangeSet getJobChanges(final String jobPath, final int jobBuildNumber) throws IOException {
    final Document changePage = loadPage(
            config.getUrl() + "/job/" + jobPath + "/" + jobBuildNumber + "/changes", null);
    final Element changesList = changePage.select("table[class=pane]").first();
    final HashMap<String, Issue> issues = getIssuesFromTable(changesList);

    final ChangeSet changeSet = load(config.getUrl() + "/job/" + jobPath + "/" + jobBuildNumber, QUERY_STRING,
            Build.class, null).changeSet;
    for (final Iterator<ChangeSetItem> iterator = changeSet.items.iterator(); iterator.hasNext();) {
        final ChangeSetItem changeItem = iterator.next();
        changeItem.issue = issues.get(changeItem.getUniqueId());
    }/* www. ja  v a 2s  .co m*/

    return changeSet;
}

From source file:ru.org.linux.user.EditRegisterWebTest.java

@Test
public void testChange() throws IOException {
    String auth = WebHelper.doLogin(resource, "JB", JB_PASS);

    ClientResponse cr = resource.path("people/JB/edit")
            .cookie(new Cookie(WebHelper.AUTH_COOKIE, auth, "/", "127.0.0.1", 1)).get(ClientResponse.class);

    assertEquals(HttpStatus.SC_OK, cr.getStatus());

    Document doc = Jsoup.parse(cr.getEntityInputStream(), "UTF-8", resource.getURI().toString());

    assertEquals("/people/JB/edit", doc.getElementById("editRegForm").attr("action"));

    String name = doc.getElementById("name").val();
    String url = doc.getElementById("url").val();
    String email = doc.getElementById("email").val();
    String town = doc.getElementById("town").val();
    String info = doc.getElementById("info").val();

    assertEquals(JB_NAME, name);//from  w w w .  j a  v a2  s  .c  o  m
    assertEquals(JB_URL, url);
    assertEquals(JB_EMAIL, email);
    assertEquals(JB_TOWN, town);
    assertEquals(JB_INFO, info);

    MultivaluedMap<String, String> formData = new MultivaluedMapImpl();
    formData.add("name", name);
    formData.add("url", url);
    formData.add("email", email);
    formData.add("town", town);
    formData.add("info", info);
    formData.add("csrf", "csrf");

    ClientResponse cr2 = resource.path("people/JB/edit")
            .cookie(new Cookie(WebHelper.AUTH_COOKIE, auth, "/", "127.0.0.1", 1))
            .cookie(new Cookie(CSRFProtectionService.CSRF_COOKIE, "csrf")).post(ClientResponse.class, formData);

    Document doc2 = Jsoup.parse(cr2.getEntityInputStream(), "UTF-8", resource.getURI().toString());

    assertEquals(HttpStatus.SC_OK, cr2.getStatus());
    assertEquals("? ? ?   ",
            doc2.select(".error").text());
    assertEquals("/people/JB/edit", doc2.getElementById("editRegForm").attr("action"));
}

From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java

/**
 * ?/* w ww  .j a  va 2  s. co m*/
 * 
 * @throws Exception
 */
private int getTotalPageNum(Document document) throws Exception {
    // ?table?
    Element pageTable = document.select("table").get(4);

    String pageHtml = pageTable.select("tr td").get(0).html();

    // ??
    String regex = ".+?(.+?).+?";

    Object result = AnalyzeUtil.regex(pageHtml, regex);
    if (null == result) {
        return 0;
    }

    int totalPageNum = Integer.parseInt(result.toString());

    return totalPageNum;
}

From source file:org.jasig.portlet.proxy.service.proxy.document.ContentClippingFilter.java

@Override
public void filter(final Document document, final IContentResponse proxyResponse, final RenderRequest request,
        final RenderResponse response) {

    // get the clipping selector for this portlet configuration
    final PortletPreferences preferences = request.getPreferences();
    final String selector = preferences.getValue(SELECTOR_KEY, null);

    // locate the matching element in the document and replace the document
    // with just that node subtree
    final Elements elements = document.select(selector);
    if (elements.size() > 0) {
        document.html("").appendChild(elements.get(0));
    }/*from w ww  . j ava2  s.  c  o m*/
}

From source file:gov.medicaid.screening.dao.impl.MedicalPracticeLicenseDAOBean.java

/**
 * Searches for the available specialty options matching the criteria.
 *
 * @param criteria the criteria for specialty search
 * @param document the current page/* w  w w .ja v  a 2  s  .  com*/
 * @return the matched code
 * @throws ServiceException if the code provided is not present
 */
private String matchSpecialtyCode(MedicalPracticeLicenseSearchCriteria criteria, Document document)
        throws ServiceException {
    Elements specialtyOptions = document.select("select#_ctl7_ddlbSpecialty option");
    Specialty specialty = criteria.getSpecialty();
    String code = null;
    boolean found = false;
    for (Element option : specialtyOptions) {
        code = option.attr("value");
        if (Util.isNotBlank(specialty.getName())) { // match the name
            if (specialty.getName().equalsIgnoreCase(option.text())) {
                if (specialty.getCode() > 0 && Integer.parseInt(code) != specialty.getCode()) {
                    throw new ServiceException(ErrorCode.MITA10007.getDesc());
                }
                found = true;
                break;
            }
        } else { // match only the code
            if (Integer.parseInt(code) == specialty.getCode()) {
                found = true;
                break;
            }
        }
    }
    if (!found) {
        throw new ServiceException(ErrorCode.MITA10007.getDesc());
    }
    return code;
}

From source file:com.sastix.cms.server.services.content.impl.ZipHandlerServiceImpl.java

public String findScormStartPage(final Path metadataPath) {
    Document doc;
    try {/*  w  w  w .  jav  a2  s  . co  m*/
        doc = Jsoup.parse(new String(Files.readAllBytes(metadataPath), "UTF-8"), "", Parser.xmlParser());
    } catch (final IOException e) {
        throw new ResourceAccessError("Zip " + metadataPath.getFileName() + " cannot be read. ");
    }
    String startPage = null;
    for (Element e : doc.select("resources")) {
        startPage = e.select("resource").get(0).attr("href");
    }
    if (startPage == null) {
        throw new ResourceAccessError("Start page in Zip " + metadataPath.getFileName() + " cannot be found");
    }
    return startPage;
}

From source file:gov.medicaid.screening.dao.impl.PharmacyLicenseDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param firstOrBusinessName First or Business name.
 * @param lastName Last name.//from  w  ww .  j a  v a 2s  . c o m
 * @param licenseNumber License number.
 * @return the search result for licenses
 * @throws URISyntaxException When an error occurs while building the URL.
 * @throws ClientProtocolException When client does not support protocol used.
 * @throws IOException When an error occurs while parsing response.
 * @throws ParseException When an error occurs while parsing response.
 */
private SearchResult<License> getAllResults(String firstOrBusinessName, String lastName, String licenseNumber)
        throws URISyntaxException, ClientProtocolException, IOException, ParseException {
    DefaultHttpClient client = new DefaultHttpClient();
    client.setRedirectStrategy(new LaxRedirectStrategy());

    String path = "/mnbop/GLSuiteWeb/Clients/MNBOPharm/Public/";
    URIBuilder builder = new URIBuilder(getSearchURL()).setPath(path + "LicenseeSearch.aspx");

    HttpGet httpget = new HttpGet(builder.build());
    HttpEntity entity = client.execute(httpget).getEntity();
    Document page = Jsoup.parse(EntityUtils.toString(entity));

    HttpPost httppost = new HttpPost(builder.build());
    List<NameValuePair> parameters = new ArrayList<NameValuePair>();
    parameters.add(new BasicNameValuePair("__VIEWSTATE", page.select("#__VIEWSTATE").first().val()));
    parameters.add(
            new BasicNameValuePair("__VIEWSTATEENCRYPTED", page.select("#__VIEWSTATEENCRYPTED").first().val()));
    parameters
            .add(new BasicNameValuePair("__EVENTVALIDATION", page.select("#__EVENTVALIDATION").first().val()));
    parameters.add(new BasicNameValuePair("ObjectID", page.select("#ObjectID").first().val()));
    parameters.add(new BasicNameValuePair("ObjectTypeID", page.select("#ObjectTypeID").first().val()));
    parameters.add(new BasicNameValuePair("waFirstName", Util.defaultString(firstOrBusinessName)));
    parameters.add(new BasicNameValuePair("waLastName", Util.defaultString(lastName)));
    parameters.add(new BasicNameValuePair("waLicenseNumber", Util.defaultString(licenseNumber)));

    httppost.setEntity(new UrlEncodedFormEntity(parameters, Charset.forName("UTF-8")));
    HttpResponse postResponse = client.execute(httppost);

    entity = postResponse.getEntity();
    // licenses list
    List<License> licenseList = new ArrayList<License>();
    if (entity != null) {
        page = Jsoup.parse(EntityUtils.toString(entity));
        Elements trs = page.select("table#DataTable a");
        if (trs != null) {
            for (Element element : trs) {
                String href = element.attr("href");
                HttpGet detailsGet = new HttpGet(getSearchURL() + path + href);
                HttpResponse detailsResponse = client.execute(detailsGet);
                HttpEntity detailsEntity = detailsResponse.getEntity();
                if (detailsEntity != null) {
                    Document details = Jsoup.parse(EntityUtils.toString(detailsEntity));
                    licenseList.add(parseLicense(details));
                }
            }
        }
    }
    SearchResult<License> result = new SearchResult<License>();
    result.setItems(licenseList);
    return result;
}