List of usage examples for org.jsoup.nodes Element children
public Elements children()
From source file:gov.medicaid.screening.dao.impl.OptometryLicenseDAOBean.java
/** * Performs a search for all possible results. * * @param identifier The value to be searched. * @return the search result for licenses * @throws URISyntaxException When an error occurs while building the URL. * @throws ClientProtocolException When client does not support protocol used. * @throws IOException When an error occurs while parsing response. * @throws ParseException When an error occurs while parsing response. * @throws PersistenceException for database related errors * @throws ServiceException for any other problems encountered *///ww w . j a va 2s.c om private SearchResult<License> getAllResults(String identifier) throws URISyntaxException, ClientProtocolException, IOException, ParseException, PersistenceException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(); URIBuilder builder = new URIBuilder(getSearchURL()).setPath("/Default.aspx"); String hostId = builder.build().toString(); builder.setParameter("tabid", "799"); HttpGet httpget = new HttpGet(builder.build()); HttpResponse landing = client.execute(httpget); Document document = Jsoup.parse(EntityUtils.toString(landing.getEntity())); HttpPost httppost = new HttpPost(builder.build()); HttpEntity entity = postForm(hostId, client, httppost, new String[][] { { "_ctl0:_ctl1:_ctl0:txtCriteria", identifier }, { "_ctl0:_ctl1:_ctl0:btnSubmit", "Search" }, { "__EVENTTARGET", "" }, { "__EVENTARGUMENT", "" }, { "__VIEWSTATE", document.select("#Form input[name=__VIEWSTATE]").first().val() } }, true); // licenses list List<License> licenseList = new ArrayList<License>(); while (entity != null) { String result = EntityUtils.toString(entity); document = Jsoup.parse(result); Elements trs = document.select("table.Datagrid tr"); if (trs != null) { for (Element element : trs) { String cssClass = element.attr("class"); if (!"DatagridHeaderStyle".equals(cssClass.trim()) && element.children().size() == 8) { Elements tds = element.children(); licenseList.add(parseLicense(tds)); } } } // done, check if there are additional results entity = null; Elements elements = document.getElementsByTag("a"); for (Element element : elements) { if (element.text().equals("Next >>")) { entity = postForm(hostId, client, httppost, new String[][] { { "_ctl0:_ctl1:_ctl0:txtCriteria", identifier }, { "__EVENTTARGET", "_ctl0:_ctl1:_ctl0:dgrdLicensee:_ctl29:_ctl1" }, { "__EVENTARGUMENT", "" }, { "__VIEWSTATE", document.select("#Form input[name=__VIEWSTATE]").first().val() } }, true); break; } } } SearchResult<License> result = new SearchResult<License>(); result.setItems(licenseList); return result; }
From source file:gov.medicaid.screening.dao.impl.SocialWorkLicenseDAOBean.java
/** * Retrieves all results from the source site. * * @param searchCriteria the search criteria. * @return the providers matched/*from www . j ava2 s . c om*/ * @throws URISyntaxException if the URL could not be correctly constructed * @throws IOException for any I/O related errors * @throws ServiceException for any other errors encountered */ private SearchResult<License> getAllResults(SocialWorkCriteria searchCriteria) throws URISyntaxException, IOException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager()); client.setRedirectStrategy(new LaxRedirectStrategy()); HttpGet getSearchPage = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getSearchPage); verifyAndAuditCall(getSearchURL(), response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); String licenseNo = ""; if (searchCriteria instanceof SocialWorkLicenseSearchByLicenseNumberCriteria) { licenseNo = "" + ((SocialWorkLicenseSearchByLicenseNumberCriteria) searchCriteria).getLicenseNumber(); } String level = "none"; if (searchCriteria.getLevel() != null) { level = Util.defaultString(searchCriteria.getLevel().getName()); } HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build()); HttpEntity entity = postForm(getSearchURL(), client, search, buildParams(searchCriteria, page, licenseNo, level, null), true); page = Jsoup.parse(EntityUtils.toString(entity)); List<License> allLicenses = new ArrayList<License>(); // check if detail page (single match) if (page.select("#lblFormTitle").text().equals("License Details")) { allLicenses.add(parseLicenseDetail(page)); } else { Elements rows = page.select(RESULT_ROWS_SELECTOR); while (rows.size() > 0) { for (Element row : rows) { License license = parseLicense(row.children()); if (license != null) { allLicenses.add(license); } } rows.clear(); // check for next page Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first(); getLog().log(Level.DEBUG, "Current page is: " + currentPage.text()); Element pageLink = currentPage.nextElementSibling(); if (pageLink != null && pageLink.hasAttr("href")) { getLog().log(Level.DEBUG, "There are more results, getting the next page."); String target = parseEventTarget(pageLink.attr("href")); entity = postForm(getSearchURL(), client, search, buildParams(searchCriteria, page, licenseNo, level, target), true); page = Jsoup.parse(EntityUtils.toString(entity)); rows = page.select(RESULT_ROWS_SELECTOR); } } } SearchResult<License> results = new SearchResult<License>(); results.setItems(allLicenses); return results; }
From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java
/** * ???//from w w w .java2 s .com * * @param eleTrs * @param rowNo * @return */ private String parseDetailTr(Element eleTr) throws Exception { Element eleTd = eleTr.select("td").get(1); // td if (eleTd.children().size() > 0) { return eleTd.child(0).html(); } else { return eleTd.html().trim(); } }
From source file:com.blackducksoftware.tools.nrt.generator.NRTReportGenerator.java
/** * Copies the HTML template into the finalHtmlOutput then injects the * generates JSON data into the specific div location and writes it out. * /*from w w w .j av a 2 s .com*/ * @param expectedFile */ public void generateHTMLFromTemplate(File finalHtmlOutput) { log.info("Writing to report: " + finalHtmlOutput); String jsonComponentList = generateJSONFromObject(componentMap); String jsonPropertyList = generateJSONFromObject(nrtConfig.getOptionsForExport()); // Construct a variable out of it jsonComponentList = "var compList=[" + jsonComponentList + "]"; jsonPropertyList = "var propList=[" + jsonPropertyList + "]"; PrintWriter writer = null; try { // Read the template Document doc = Jsoup.parse(finalHtmlOutput, "UTF-8"); // Inject the JSON Elements jsonElementDivBlock = doc.getElementsByClass(NRTConstants.HTML_JSON_DATA_BLOCK); // This will be empty, but it should exist Element jsonDivElement = jsonElementDivBlock.get(0); if (jsonDivElement != null) { // Remove any script tags from it, in case the user populated // the template incorrectly with data if (jsonDivElement.children().size() > 0) { Elements children = jsonDivElement.children(); for (int i = 0; i < children.size(); i++) { Element el = children.get(i); el.remove(); } } addNewScriptElementWithJson(jsonDivElement, jsonComponentList); addNewScriptElementWithJson(jsonDivElement, jsonPropertyList); } else { log.error("Unable to find a valid critical DIV inside HTML template: " + NRTConstants.HTML_JSON_DATA_BLOCK); } writer = new PrintWriter(finalHtmlOutput, "UTF-8"); // Write out the file writer.write(doc.html()); writer.flush(); writer.close(); } catch (Exception e) { log.error("Unable to write out final report file!", e); } finally { writer.close(); } }
From source file:com.kingfong.webcrawler.util.DOMContentUtils.java
/** * This method finds all anchors below the supplied DOM * <code>node</code>, and creates appropriate {@link Outlink} * records for each (relative to the supplied <code>base</code> * URL), and adds them to the <code>outlinks</code> {@link * ArrayList}.//from w ww . j a va 2 s . c o m * * <p> * * Links without inner structure (tags, text, etc) are discarded, as * are links which contain only single nested links and empty text * nodes (this is a common DOM-fixup artifact, at least with * nekohtml). */ public void getOutlinks(String html, URL url, HashSet<String> outlinks) { Document document = Jsoup.parse(html); Elements elements = document.getAllElements(); for (Element currentNode : elements) { String nodeName = currentNode.tagName(); // short nodeType = currentNode.; Elements children = currentNode.children(); nodeName = nodeName.toLowerCase(); LinkParams params = linkParams.get(nodeName); if (params != null) { // if (!shouldThrowAwayLink(currentNode, children, childLen, // params)) { // StringBuilder linkText = new StringBuilder(); // getText(linkText, currentNode, true); Attributes attrs = currentNode.attributes(); String target = null; boolean noFollow = false; boolean post = false; Iterator<Attribute> iterator = attrs.iterator(); while (iterator.hasNext()) { Attribute attr = iterator.next(); String attrName = attr.getKey(); if (params.attrName.equalsIgnoreCase(attrName)) { target = attr.getValue(); } else if ("rel".equalsIgnoreCase(attrName) && "nofollow".equalsIgnoreCase(attr.getValue())) { noFollow = true; } else if ("method".equalsIgnoreCase(attrName) && "post".equalsIgnoreCase(attr.getValue())) { post = true; } } if (StringUtils.startsWith(target, "/")) { target = url.getProtocol() + "://" + url.getHost() + target; } if (target != null && URLFilter.filt(target)) { outlinks.add(target); } // } // this should not have any children, skip them if (params.childLen == 0) continue; } } }
From source file:de.geeksfactory.opacclient.apis.Littera.java
protected SearchRequestResult executeSearch(List<SearchQuery> query, int pageIndex) throws IOException, OpacErrorException, JSONException { final String searchUrl; if (!initialised) { start();/* w w w. j a va2s. c o m*/ } try { searchUrl = buildSearchUrl(query, pageIndex); } catch (URISyntaxException e) { throw new RuntimeException(e); } final String html = httpGet(searchUrl, getDefaultEncoding()); final Document doc = Jsoup.parse(html); final Element navigation = doc.select(".result_view .navigation").first(); final int totalResults = navigation != null ? parseTotalResults(navigation.text()) : 0; final Element ul = doc.select(".result_view ul.list").first(); final List<SearchResult> results = new ArrayList<>(); for (final Element li : ul.children()) { if (li.hasClass("zugangsmonat")) { continue; } final SearchResult result = new SearchResult(); final Element title = li.select(".titelinfo a").first(); result.setId(getQueryParamsFirst(title.attr("href")).get("id")); result.setInnerhtml(title.text() + "<br>" + title.parent().nextElementSibling().text()); result.setNr(results.size()); result.setPage(pageIndex); result.setType(MEDIA_TYPES.get(li.select(".statusinfo .ma").text())); result.setCover(getCover(li)); final String statusImg = li.select(".status img").attr("src"); result.setStatus(statusImg.contains("-yes") ? SearchResult.Status.GREEN : statusImg.contains("-no") ? SearchResult.Status.RED : null); results.add(result); } return new SearchRequestResult(results, totalResults, pageIndex); }
From source file:mobi.jenkinsci.ci.client.JenkinsClient.java
private HashMap<String, Issue> getIssuesFromTable(final Element changesTable) { final HashMap<String, Issue> issues = new HashMap<String, ChangeSetItem.Issue>(); if (changesTable == null) { return issues; }/*from w ww .ja va 2 s . c o m*/ if (changesTable.children().size() <= 0) { LOG.warn("Cannot find changes TBODY"); return issues; } final Element tbody = changesTable.child(0); final Elements rows = tbody.children(); for (final Element row : rows) { final String commitId = getCommitIdFromRow(row); Issue issue; try { issue = getIssueFromRow(row); if (issue != null) { issues.put(commitId, issue); } } catch (final MalformedURLException e) { LOG.warn("Invalid issue URL for row " + row.toString() + ": skipping", e); } } return issues; }
From source file:org.apache.sling.hapi.client.impl.microdata.MicrodataDocument.java
private List<Item> selectItems(Element e, List<Item> items) { if (e.hasAttr("itemscope") && !e.hasAttr("itemprop")) { items.add(new ItemImpl(e, this)); return items; }//from www . j a v a 2 s. c om for (Element c : e.children()) { selectItems(c, items); } return items; }
From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.DBpediaOntologyOld.java
private void traverseHierarchy(Element e, DBpediaCategory category, HashMap<String, DBpediaCategory> map) { for (Element c : e.children()) { String tagName = c.tag().getName(); if (tagName.equals("a")) { String href = c.attr("href"); if (href != null && href.length() > 0) { category.setLabel(c.text()); category.setUri(CLASSES_BASE_URI + c.text()); map.put(category.getLabel(), category); System.out.println(c.text() + "\t" + CLASSES_BASE_URI + c.text()); }// w w w . ja va 2 s .c om } else if (tagName.equals("ul")) { for (Element c1 : c.children()) { if (c1.tagName().equals("li")) { DBpediaCategory cc = new DBpediaCategory(); traverseHierarchy(c1, cc, map); cc.parents = new HashSet<>(); cc.parents.add(category); category.getSubClasses().add(cc); } } } } }
From source file:com.johan.vertretungsplan.parser.UntisInfoParser.java
@Override public Vertretungsplan getVertretungsplan() throws IOException, JSONException { new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); Document navbarDoc = Jsoup.parse(getNavbarDoc().replace(" ", "")); Element select = navbarDoc.select("select[name=week]").first(); Vertretungsplan v = new Vertretungsplan(); List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>(); String info = navbarDoc.select(".description").text(); String stand;/* w w w . j a v a2 s .c o m*/ try { stand = info.substring(info.indexOf("Stand:")); } catch (Exception e) { stand = ""; } for (Element option : select.children()) { String week = option.attr("value"); String letter = data.optString("letter", "w"); if (data.optBoolean("single_classes", false)) { int classNumber = 1; for (String klasse : getAllClasses()) { String paddedNumber = String.format("%05d", classNumber); String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + paddedNumber + ".htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + paddedNumber + ".htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } writeTagByDatum(tage, tag); } classNumber++; } } else { String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + "00000.htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + "00000.htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } tage.add(tag); } } v.setTage(tage); } return v; }