List of usage examples for org.jsoup.nodes Element children
public Elements children()
From source file:gov.medicaid.screening.dao.impl.PodiatricMedicineLicenseDAOBean.java
/** * Retrieves all results from the source site. * * @param criteria the search criteria.//from w w w.j a v a 2 s. co m * @return the providers matched * @throws URISyntaxException if the URL could not be correctly constructed * @throws IOException for any I/O related errors * @throws ServiceException for any other errors encountered */ private SearchResult<License> getAllResults(String criteria) throws URISyntaxException, IOException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(); client.setRedirectStrategy(new LaxRedirectStrategy()); HttpGet getFrontPage = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getFrontPage); verifyAndAuditCall(getSearchURL(), response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); HttpPost getSearchPage = new HttpPost(new URIBuilder(getSearchURL()).build()); HttpEntity entity = postForm(getSearchURL(), client, getSearchPage, new String[][] { { "_ctl2:dropAgencyCode", "H7Q" }, { "_ctl2:btnLogin", "Login" }, { "__VIEWSTATE", page.select("#__aspnetForm input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build()); entity = postForm(getSearchURL(), client, search, new String[][] { { "_ctl2:txtCriteria", criteria }, { "_ctl2:btnSearch", "Search" }, { "__VIEWSTATE", page.select("#__aspnetForm input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); List<License> allLicenses = new ArrayList<License>(); Elements rows = page.select("table#_ctl2_dgrdResults tr.DataGrid"); for (Element row : rows) { License license = parseLicense(row.children()); if (license != null) { allLicenses.add(license); } } SearchResult<License> results = new SearchResult<License>(); results.setItems(allLicenses); return results; }
From source file:feedzilla.Feed.java
private void parser(Element entry) { boolean source = false; for (Element element : entry.children()) { switch (element.nodeName()) { case "id": this.id = Integer.parseInt(element.text().split(":")[1]); break; case "title": if (source) { this.source_title = element.text(); } else { this.title = element.text(); }//from w ww .j av a 2 s.c o m break; case "summary": this.summary = element.text().split("<br")[0]; break; case "published": this.published = element.text(); break; case "updated": this.updated = element.text(); break; case "author": this.author = element.text(); break; case "link": if (source) { this.source_link = element.attr("href"); } else { this.link = element.attr("href"); } break; case "rights": this.copyright = element.text(); break; case "source": source = true; break; default: Log.debug("Unknow TAG: " + element.nodeName()); break; } } }
From source file:org.abondar.experimental.eventsearch.EventFinder.java
public String getEventPlaces(String place) { String res = ""; try {/*from w ww . j av a 2s . c o m*/ Document placeDoc = Jsoup.connect("https://afisha.yandex.ru" + place).get(); Elements elems = placeDoc.select("p"); for (Element e : elems) { if (e.parents().get(1).html().contains("<div style")) { if (e.children().size() > 1) { if (e.child(1).hasAttr("href")) { res = e.child(1).html() + " ?"; } } else if (e.children().isEmpty()) { res = e.html() + " ?"; } } } } catch (IOException ex) { Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex); } return res; }
From source file:gov.medicaid.screening.dao.impl.MedicaidCertifiedProvidersDAOBean.java
/** * Retrieves all results from the source site. * * @param criteria the search criteria.//from w ww. j av a 2 s .co m * @return the providers matched * @throws URISyntaxException if the URL could not be correctly constructed * @throws IOException for any I/O related errors * @throws ServiceException for any other errors encountered */ private SearchResult<ProviderProfile> getAllResults(MedicaidCertifiedProviderSearchCriteria criteria) throws URISyntaxException, IOException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(); client.setRedirectStrategy(new LaxRedirectStrategy()); // we need to get a token from the start page, this will be stored in the client HttpGet getFrontPage = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getFrontPage); verifyAndAuditCall(getSearchURL(), response); EntityUtils.consume(response.getEntity()); // releases the connection // our client is now valid, pass the criteria to the search page String postSearchURL = Util.replaceLastURLPart(getSearchURL(), "showprovideroutput.cfm"); HttpPost searchPage = new HttpPost(new URIBuilder(postSearchURL).build()); HttpEntity entity = postForm(postSearchURL, client, searchPage, new String[][] { { "ProviderCatagory", criteria.getType() }, { "WhichArea", criteria.getCriteria() }, { "Submit", "Submit" }, { "SelectCounty", "All".equals(criteria.getCriteria()) ? "0" : criteria.getValue() }, { "CityToFind", "All".equals(criteria.getCriteria()) ? "" : criteria.getValue() }, { "ProviderToFind", "All".equals(criteria.getCriteria()) ? "" : criteria.getValue() } }, true); // this now holds the search results, parse every row Document page = Jsoup.parse(EntityUtils.toString(entity)); List<ProviderProfile> allProviders = new ArrayList<ProviderProfile>(); Elements rows = page.select("div#body table tbody tr:gt(0)"); for (Element row : rows) { ProviderProfile profile = parseProfile(row.children()); if (profile != null) { allProviders.add(profile); } } SearchResult<ProviderProfile> results = new SearchResult<ProviderProfile>(); results.setItems(allProviders); return results; }
From source file:MySpaceParser.java
private void parseSingleFile(File file) throws Exception { Document htmlFile = null;//from w w w . ja va2 s .co m try { htmlFile = Jsoup.parse(file, "ISO-8859-1"); } catch (Exception e) { e.printStackTrace(); } // Elements parents =htmlFile.getElementsByClass("cover"); Elements parents = htmlFile.getElementsByTag("section"); String title = "*^*"; String artist = "*^*"; String url = "*^*"; String imageurl = "*^*"; String pageTitle = "*^*"; String description = "*^*"; String songid = "*^*"; String genre = "*^*"; String album = "*^*"; String year = "*^*"; boolean isVideo = false; Elements titles = htmlFile.getElementsByTag("title"); Elements metas = htmlFile.getElementsByTag("meta"); for (Element meta : metas) { String name = meta.attr("name"); String prop = meta.attr("property"); if (prop.equals("og:video")) { System.out.println(); url = meta.attr("content"); String arr[] = url.split("/"); songid = arr[arr.length - 1]; title = arr[arr.length - 2]; artist = arr[arr.length - 4]; isVideo = true; } if (name.equals("description")) { // System.out.println(); description = meta.attr("content"); } } for (Element Pagetitle : titles) { pageTitle = Pagetitle.html(); // System.out.println(pageTitle); break; } if (isVideo) { SongData s = new SongData(title, url, album, artist, year, genre, imageurl); s.setPagetitle(pageTitle); s.setDescrption(description); index.put(songid, s); return; } if (parents.isEmpty() && !isVideo) { return; } else { // boolean isVideo = false; titles = htmlFile.getElementsByTag("title"); metas = htmlFile.getElementsByTag("meta"); for (Element meta : metas) { String name = meta.attr("name"); String prop = meta.attr("property"); if (prop.equals("og:video")) { System.out.println(); url = meta.attr("content"); String arr[] = url.split("/"); songid = arr[arr.length - 1]; isVideo = true; } if (name.equals("description")) { // System.out.println(); description = meta.attr("content"); } } for (Element Pagetitle : titles) { pageTitle = Pagetitle.html(); // System.out.println(pageTitle); break; } for (Element e : parents) { if (e.attr("id").equals("song")) { Elements e1 = e.children(); for (Element e2 : e1) { if (e2.attr("id").equals("actions")) { Elements e3 = e2.children(); int count = 0; for (Element e4 : e3) { if (count == 1) { songid = e4.attr("data-song-id"); album = e4.attr("data-album-title"); title = e4.attr("data-title"); artist = e4.attr("data-artist-name"); url = "www.myspace.com" + e4.attr("data-url"); genre = e4.attr("data-genre-name"); imageurl = e4.attr("data-image-url"); SongData s = new SongData(title, url, album, artist, year, genre, imageurl); s.setPagetitle(pageTitle); s.setDescrption(description); index.put(songid, s); } count++; } // System.out.println(); } } // System.out.println(e.attr("id")); } } //System.out.println(); } }
From source file:gov.medicaid.screening.dao.impl.HealthOccupationsProgramCredentialDAOBean.java
/** * Performs a search for all possible results. * * @param criteria The search criteria.// ww w . ja v a 2s . c om * @return the search result for licenses * * @throws URISyntaxException if an error occurs while building the URL. * @throws ClientProtocolException if client does not support protocol used. * @throws IOException if an error occurs while parsing response. * @throws ParseException if an error occurs while parsing response. * @throws ServiceException for any other problems encountered */ private SearchResult<ProviderProfile> getAllResults(HealthOccupationsProgramCredentialSearchCriteria criteria) throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager()); client.setRedirectStrategy(new LaxRedirectStrategy()); HttpGet getSearch = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getSearch); verifyAndAuditCall(getSearchURL(), response); EntityUtils.consume(response.getEntity()); String fullSearchURL = Util.replaceLastURLPart(getSearchURL(), "credential_search.do"); HttpPost search = new HttpPost(new URIBuilder(fullSearchURL).build()); List<ProviderProfile> allProfiles = new ArrayList<ProviderProfile>(); HttpEntity entity = postForm(fullSearchURL, client, search, new String[][] { { "city", Util.defaultString(criteria.getCity()) }, { "credentialNumber", Util.defaultLongString(criteria.getCredentialNumber()) }, { "firstName", Util.defaultString(criteria.getFirstName()) }, { "lastName", Util.defaultString(criteria.getLastName()) }, { "county", getDefaultValue(criteria.getCounty()) }, { "credentialStatus", getDefaultValue(criteria.getStatus()) }, { "credentialType", getDefaultValue(criteria.getOccupationType()) }, { "discipline", getDefaultValue(criteria.getDiscipline()) }, { "state", getDefaultValue(criteria.getState()) }, { "p_action", "search" } }, false); Document page = Jsoup.parse(EntityUtils.toString(entity)); Elements rows = page.select("table.formTable tr:gt(0)"); for (Element row : rows) { allProfiles.add(parseProfile(row.children())); } SearchResult<ProviderProfile> searchResult = new SearchResult<ProviderProfile>(); searchResult.setItems(allProfiles); return searchResult; }
From source file:gov.medicaid.screening.dao.impl.DieteticsAndNutritionPracticeLicenseDAOBean.java
/** * Performs a search for all possible results. * * @param identifier The value to be searched. * @return the search result for licenses * @throws URISyntaxException When an error occurs while building the URL. * @throws ClientProtocolException When client does not support protocol used. * @throws IOException When an error occurs while parsing response. * @throws ParseException When an error occurs while parsing response. * @throws PersistenceException for database related errors * @throws ServiceException for any other errors *//* www .j a v a2 s .c o m*/ private SearchResult<License> getAllResults(String identifier) throws URISyntaxException, ClientProtocolException, IOException, ParseException, PersistenceException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(); URIBuilder builder = new URIBuilder(getSearchURL()); String hostId = builder.build().toString(); HttpGet httpget = new HttpGet(builder.build()); HttpResponse landing = client.execute(httpget); Document document = Jsoup.parse(EntityUtils.toString(landing.getEntity())); HttpPost httppost = new HttpPost(builder.build()); HttpEntity entity = postForm(hostId, client, httppost, new String[][] { { "_ctl0:_ctl1:_ctl0:txtCriteria", identifier }, { "_ctl0:_ctl1:_ctl0:btnSubmit", "Search" }, { "__EVENTTARGET", "" }, { "__EVENTARGUMENT", "" }, { "__VIEWSTATE", document.select("#Form input[name=__VIEWSTATE]").first().val() } }, true); // licenses list List<License> licenseList = new ArrayList<License>(); while (entity != null) { String result = EntityUtils.toString(entity); document = Jsoup.parse(result); Elements trs = document.select(GRID_ROW_SELECTOR); if (trs != null) { for (Element element : trs) { licenseList.add(parseLicense(element.children())); } } // done, check if there are additional results entity = null; Elements elements = document.getElementsByTag("a"); for (Element element : elements) { if (element.text().equals("Next >>")) { entity = postForm(hostId, client, httppost, new String[][] { { "_ctl0:_ctl1:_ctl0:txtCriteria", identifier }, { "__EVENTTARGET", "_ctl0:_ctl1:_ctl0:dgrdLicensee:_ctl29:_ctl1" }, { "__EVENTARGUMENT", "" }, { "__VIEWSTATE", document.select("#Form input[name=__VIEWSTATE]").first().val() } }, true); break; } } } SearchResult<License> result = new SearchResult<License>(); result.setItems(licenseList); return result; }
From source file:web.analyzer.utils.Utils.java
public List<Heading> docHeadingsProcess(Document doc) { List<Heading> headingList = new ArrayList<Heading>(); int level = 0; Elements eles = doc.select("*"); for (Element ele : eles) { level++;// w w w . j a va 2 s . c om if (HEADING_TAG.contains(ele.tagName())) { headingList.add(new Heading(ele.tagName(), ele.html(), level)); } if (ele.children().size() == 0) { level = 0; continue; } else { eles = ele.children(); } } return headingList; }
From source file:gov.medicaid.screening.dao.impl.MarriageAndFamilyTherapyLicenseDAOBean.java
/** * Performs a search for all possible results. * * @param criteria The search criteria.//from w w w . j a v a 2s .c om * @param identifier The value to be searched. * @param host The host where to perform search. * @param pageNumber The page number requested * @return the search result for licenses * @throws URISyntaxException When an error occurs while building the URL. * @throws IOException When an error occurs while parsing response. * @throws ParseException When an error occurs while parsing response. * @throws PersistenceException if any db related error is encountered * @throws ServiceException When an error occurs while trying to perform search. */ private SearchResult<License> getAllResults(String criteria, String identifier, String host, int pageNumber) throws URISyntaxException, ParseException, PersistenceException, IOException, ServiceException { HttpClient client = new DefaultHttpClient(); URIBuilder builder = new URIBuilder(host).setPath("/search.asp"); String hostId = builder.build().toString(); builder.setParameter("qry", criteria).setParameter("crit", identifier).setParameter("p", "s") .setParameter("rsp", pageNumber + ""); URI uri = builder.build(); HttpGet httpget = new HttpGet(uri); SearchResult<License> searchResults = new SearchResult<License>(); HttpResponse response = client.execute(httpget); int statusCode = response.getStatusLine().getStatusCode(); if (statusCode == HttpStatus.SC_OK) { HttpEntity entity = response.getEntity(); SearchResult<License> nextResults = null; // licenses list List<License> licenseList = new ArrayList<License>(); if (entity != null) { String result = EntityUtils.toString(entity); Document document = Jsoup.parse(result); Elements trs = document.select("tr[bgcolor]"); for (Element tr : trs) { Elements tds = tr.children(); licenseList.add(parseLicenseInfo(tds)); } // check if there is next page Element next = document.select("a:containsOwn(Next)").first(); if (next != null) { nextResults = getAllResults(criteria, identifier, host, pageNumber + 1); } if (nextResults != null) { licenseList.addAll(nextResults.getItems()); } } searchResults.setItems(licenseList); } verifyAndAuditCall(hostId, response); return searchResults; }
From source file:by.heap.remark.convert.TextCleaner.java
private void fixLineBreaks(Element el) { for (final Element e : el.children()) { if (e.tagName().equals("br")) { e.before("\n"); e.remove();//w ww. j a va 2 s . c o m } else { fixLineBreaks(e); } } }