List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:de.stkl.gbgvertretungsplan.sync.SyncAdapter.java
private Map<String, String> parseGeneralData(Element root, int dataType) { Map<String, String> generalData = new HashMap<String, String>(); // last update time and day Element updateTime = root.select("table.mon_head td:eq(2) p").first(); if (updateTime != null) { Pattern pat = Pattern.compile("(Stand: [\\.:0-9 ]+)", Pattern.DOTALL); Matcher matcher = pat.matcher(updateTime.text()); if (matcher.find()) generalData.put(Sync.GENERAL_DATA_UPDATETIME, matcher.group(1)); }/*from ww w . j a v a 2s . c o m*/ // date the substitution table belongs to Element belongingDate = root.select("div.mon_title").first(); if (belongingDate != null) generalData.put(Sync.GENERAL_DATA_DATE, belongingDate.text()); // daily information Elements dailyInfos = root.select("table.info tr"); int i = 0; for (Element info : dailyInfos) { Elements e = info.select("td"); if (e.size() == 0) continue; String title = "", description = ""; for (TextNode node : e.first().textNodes()) title += node.text() + '\n'; title = title.trim(); // description only if available if (e.size() > 1) { for (TextNode node : e.get(1).textNodes()) description += node.text() + '\n'; description = title.trim(); } String keyTitle = "", keyDescription = ""; switch (i) { case 0: keyTitle = Sync.GENERAL_DATA_DAILYINFO_1_TITLE; keyDescription = Sync.GENERAL_DATA_DAILYINFO_1_DESCRIPTION; break; case 1: keyTitle = Sync.GENERAL_DATA_DAILYINFO_2_TITLE; keyDescription = Sync.GENERAL_DATA_DAILYINFO_2_DESCRIPTION; break; case 2: keyTitle = Sync.GENERAL_DATA_DAILYINFO_3_TITLE; keyDescription = Sync.GENERAL_DATA_DAILYINFO_3_DESCRIPTION; break; default: break; } if (!keyTitle.equals("")) { generalData.put(keyTitle, title); generalData.put(keyDescription, description); } i++; } generalData.put(Sync.GENERAL_DATA_DATATYPE, String.valueOf(dataType)); return generalData; }
From source file:com.amastigote.xdu.query.module.EduSystem.java
private @Nullable JSONObject lessonsQuery() throws IOException, JSONException { if (!checkIsLogin(ID)) return null; URL url = new URL(SYS_HOST + "xkAction.do?actionType=6"); HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(); httpURLConnection.setRequestProperty("Cookie", "JSESSIONID=" + SYS_JSESSIONID); httpURLConnection.connect();/* w ww. j ava 2s . co m*/ Document document = Jsoup.parse(httpURLConnection.getInputStream(), "gb2312", httpURLConnection.getURL().toString()); document = Jsoup.parse(document.toString().replaceAll(" ", "")); Elements lessons = document.select("table[class=titleTop2]"); Element lessonsElement = lessons.get(1); Elements lessonsInfo = lessonsElement.select("tr[onmouseout=this.className='even';]"); int lessons_quantity = lessonsInfo.size(); JSONArray jsonArray = new JSONArray(); for (int i = 0; i < lessons_quantity;) { Element lessonInfo = lessonsInfo.get(i); Elements lessonDetails = lessonInfo.select("td"); // if (lessonDetails.get(14).text().equals("")) { i++; continue; } JSONObject JLessonObject = new JSONObject(); JLessonObject.put(CourseKey.ID, lessonDetails.get(1).text()); JLessonObject.put(CourseKey.NAME, lessonDetails.get(2).text()); JLessonObject.put(CourseKey.CREDIT, lessonDetails.get(4).text()); JLessonObject.put(CourseKey.LENGTH, lessonDetails.get(5).text()); JLessonObject.put(CourseKey.ATTR, lessonDetails.get(6).text()); JLessonObject.put(CourseKey.EXAM_TYPE, lessonDetails.get(7).text()); JLessonObject.put(CourseKey.TEACHER, lessonDetails.get(8).text()); JSONArray JLessonTimeAndPosArray = new JSONArray(); JSONObject JLessonTimeAndPos = new JSONObject(); JLessonTimeAndPos.put(CourseKey.WEEK, lessonDetails.get(12).text()); JLessonTimeAndPos.put(CourseKey.WEEK_DAY, lessonDetails.get(13).text()); JLessonTimeAndPos.put(CourseKey.SECTION_TIME, lessonDetails.get(14).text()); JLessonTimeAndPos.put(CourseKey.SECTION_LENGTH, lessonDetails.get(15).text()); JLessonTimeAndPos.put(CourseKey.CAMPUS, lessonDetails.get(16).text()); JLessonTimeAndPos.put(CourseKey.BUILDING, lessonDetails.get(17).text()); JLessonTimeAndPos.put(CourseKey.CLASSROOM, lessonDetails.get(18).text()); JLessonTimeAndPosArray.put(JLessonTimeAndPos); i++; //??Array int row_span; //row_span?1 if ("".equals(lessonInfo.select("td").get(0).attr("rowspan"))) { row_span = 1; } else { row_span = Integer.parseInt(lessonInfo.select("td").get(0).attr("rowspan")); } //row_span?1?? for (int j = 0; j < row_span - 1; j++, i++) { Elements EExtraTimeAndPos = lessonsInfo.get(i).select("td"); JSONObject JExtraLessonTimeAndPos = new JSONObject(); JExtraLessonTimeAndPos.put(CourseKey.WEEK, EExtraTimeAndPos.get(0).text()); JExtraLessonTimeAndPos.put(CourseKey.WEEK_DAY, EExtraTimeAndPos.get(1).text()); JExtraLessonTimeAndPos.put(CourseKey.SECTION_TIME, EExtraTimeAndPos.get(2).text()); JExtraLessonTimeAndPos.put(CourseKey.SECTION_LENGTH, EExtraTimeAndPos.get(3).text()); JExtraLessonTimeAndPos.put(CourseKey.CAMPUS, EExtraTimeAndPos.get(4).text()); JExtraLessonTimeAndPos.put(CourseKey.BUILDING, EExtraTimeAndPos.get(5).text()); JExtraLessonTimeAndPos.put(CourseKey.CLASSROOM, EExtraTimeAndPos.get(6).text()); JLessonTimeAndPosArray.put(JExtraLessonTimeAndPos); } JLessonObject.put(CourseKey.TIME_AND_LOCATION_DERAIL, JLessonTimeAndPosArray); jsonArray.put(JLessonObject); } return new JSONObject().put("ARRAY", jsonArray); }
From source file:net.parser.JobParser.java
public List<Job> getFeaturedJobIdAndLink(List<Job> jobs) { Job job = null;//from www . jav a2 s .c o m Employer employer = null; Elements elements = doc.select("#featured-jobs li"); for (Element jobElement : elements) { employer = new Employer(); Elements empElements = jobElement.select(".logo"); if (empElements.size() == 1) { employer.setName(empElements.get(0).attr("title")); employer.setId(0); } else { String linkEmployer = empElements.get(0).attr("href"); employer.setLink(linkEmployer); linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length()); linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length()); linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length()); linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length()); String id = linkEmployer.substring(0, linkEmployer.indexOf("/")); employer.setId(Integer.parseInt(id)); employer.setName(empElements.get(1).attr("title")); } Elements aElements = jobElement.select("span a"); for (Element aJob : aElements) { job = new Job(); String linkJob = aJob.attr("href"); linkJob = linkJob.replaceFirst("www", "m"); job.setLink(linkJob); String id = aJob.attr("data-id"); job.setId(Integer.parseInt(id)); job.setEmployer(employer); jobs.add(job); } } return jobs; }
From source file:gov.medicaid.screening.dao.impl.BBHTLicenseDAOBean.java
/** * Performs a search for all possible results. * * @param criteria The search criteria.// w w w.ja va 2 s. c o m * @param byName flag indicating it is a name search * @return the search result for licenses * * @throws URISyntaxException if an error occurs while building the URL. * @throws ClientProtocolException if client does not support protocol used. * @throws IOException if an error occurs while parsing response. * @throws ParseException if an error occurs while parsing response. * @throws ServiceException for any other problems encountered */ private SearchResult<License> getAllResults(BBHTLicenseSearchCriteria criteria, boolean byName) throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager()); client.setRedirectStrategy(new LaxRedirectStrategy()); HttpGet getSearch = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getSearch); verifyAndAuditCall(getSearchURL(), response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build()); List<License> allLicenses = new ArrayList<License>(); // switch to search by name screen if (byName) { HttpEntity entity = postForm(getSearchURL(), client, search, new String[][] { { "__EVENTTARGET", "_ctl7_rbtnSearch_1" }, { "__EVENTARGUMENT", "" }, { "_ctl7:ddlbLicenseType", "CD" }, { "_ctl7:rbtnSearch", "2" }, { "_ctl7:txtLicenseNumber", "" }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); entity = getResultPage(criteria, client, page, search, "_ctl7:cmdSearch", getSearchURL()); page = Jsoup.parse(EntityUtils.toString(entity)); // get the data grid entries if (page.select("table#_ctl7_grdSearchResults").size() < 1) { throw new ParsingException(ErrorCode.MITA50002.getDesc()); } Elements rows = page.select(GRID_ROW_SELECTOR); while (rows.size() > 0) { for (Element row : rows) { String url = row.select("a").first().attr("href"); String licenseNo = row.select("td:eq(5)").text(); HttpGet getDetail = new HttpGet(Util.replaceLastURLPart(getSearchURL(), url)); response = client.execute(getDetail); verifyAndAuditCall(getSearchURL(), response); Document licenseDetails = Jsoup.parse(EntityUtils.toString(response.getEntity())); allLicenses.add(parseLicense(licenseDetails, licenseNo)); } rows.clear(); // check for next page Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first(); if (getLog() != null) { getLog().log(Level.DEBUG, "Current page is: " + currentPage.text()); } Element pageLink = currentPage.nextElementSibling(); if (pageLink != null && pageLink.hasAttr("href")) { if (getLog() != null) { getLog().log(Level.DEBUG, "There are more results, getting the next page."); } String target = parseEventTarget(pageLink.attr("href")); entity = getResultPage(criteria, client, page, search, target, getSearchURL()); page = Jsoup.parse(EntityUtils.toString(entity)); rows = page.select(GRID_ROW_SELECTOR); } } } else { // search by license number (site supports only exact match) HttpEntity entity = postForm(getSearchURL(), client, search, new String[][] { { "__EVENTTARGET", "_ctl7:cmdSearch" }, { "__EVENTARGUMENT", "" }, { "_ctl7:ddlbLicenseType", Util.defaultString(criteria.getLicenseType().getName()) }, { "_ctl7:rbtnSearch", "1" }, { "_ctl7:txtLicenseNumber", Util.defaultString(criteria.getIdentifier()) }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); if (page.select("span#lblFormTitle").text().equals("License Details")) { String prefLicenseNo = criteria.getIdentifier(); allLicenses.add(parseLicense(page, prefLicenseNo)); } } SearchResult<License> searchResult = new SearchResult<License>(); searchResult.setItems(allLicenses); return searchResult; }
From source file:gov.medicaid.screening.dao.impl.BBHTLicenseDAOBean.java
/** * Parses the nursing license details page. * * @param page the details page/*from w w w. ja v a2 s. c o m*/ * @param licenseNo if user has multiple licenses, this one will be used * @return the parsed license details * @throws ParsingException if the page does not contain the expected elements */ private License parseLicense(Document page, String licenseNo) throws ParsingException { if (!page.select("span#lblFormTitle").text().equals("License Details")) { throw new ParsingException(ErrorCode.MITA50002.getDesc()); } License license = new License(); ProviderProfile profile = new ProviderProfile(); license.setProfile(profile); String fullNameWithType = page.select("#_ctl7_lblName").text(); String fullName = fullNameWithType.indexOf(",") != -1 ? fullNameWithType.substring(0, fullNameWithType.indexOf(",")) : fullNameWithType; User user = new User(); profile.setUser(user); String[] nameParts = fullName.split(" "); user.setLastName(nameParts[nameParts.length - 1]); if (nameParts.length > 1) { user.setFirstName(nameParts[0]); } // everything else goes to middle name (per site behavior) if (nameParts.length > 2) { StringBuffer sb = new StringBuffer(); for (int i = 1; i < nameParts.length - 1; i++) { if (sb.length() > 0) { sb.append(" "); } sb.append(nameParts[i]); } user.setMiddleName(sb.toString()); } String gender = page.select("#_ctl7_lblGender").text(); if (Util.isNotBlank(gender)) { if ("Female".equals(gender)) { profile.setSex(Sex.FEMALE); } else { profile.setSex(Sex.MALE); } } String city = page.select("#_ctl7_lblPublicCity").text(); if (Util.isNotBlank(city)) { List<Address> addresses = new ArrayList<Address>(); Address address = new Address(); addresses.add(address); address.setCity(city); profile.setAddresses(addresses); } Elements licenses = page.select("#_ctl7_dgLicense tr.Normal"); for (Element row : licenses) { String licenseNumber = row.select("td:eq(1)").text(); if (licenseNo != null && !licenseNumber.startsWith(licenseNo)) { // user has multiple licenses, the results will show this user twice (search by name) continue; } license.setLicenseNumber(licenseNumber); LicenseType type = new LicenseType(); type.setName(row.select("td:eq(0)").text()); license.setType(type); LicenseStatus status = new LicenseStatus(); status.setName(row.select("td:eq(2)").text()); license.setStatus(status); String issueDate = row.select("td:eq(3)").text(); if (Util.isNotBlank(issueDate)) { license.setOriginalIssueDate(parseDate(issueDate, DATE_FORMAT)); } String expirationDate = row.select("td:eq(4)").text(); if (Util.isNotBlank(expirationDate)) { license.setExpireDate(parseDate(expirationDate, DATE_FORMAT)); } } licenses.clear(); return license; }
From source file:gov.medicaid.screening.dao.impl.MedicalPracticeLicenseDAOBean.java
/** * Performs a search for all possible results. * * @param criteria The search criteria.//www. ja v a 2 s. c om * @param byName flag indicating it is a search by name. * @return the search result for licenses * @throws URISyntaxException if an error occurs while building the URL. * @throws ClientProtocolException if client does not support protocol used. * @throws IOException if an error occurs while parsing response. * @throws ParseException if an error occurs while parsing response. * @throws ServiceException for any other problems encountered */ private SearchResult<License> getAllResults(MedicalPracticeLicenseSearchCriteria criteria, boolean byName) throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(); URIBuilder builder = new URIBuilder(getSearchURL()).setPath("/BMP/DesktopModules/ServiceForm.aspx"); String hostId = builder.toString(); builder.setParameter("svid", "30").setParameter("mid", "176"); HttpGet httpget = new HttpGet(builder.build()); HttpResponse landing = client.execute(httpget); Document document = Jsoup.parse(EntityUtils.toString(landing.getEntity())); HttpPost httppost = new HttpPost(builder.build()); HttpEntity entity = postForm(hostId, client, httppost, new String[][] { { "__EVENTTARGET", byName ? "_ctl7_rblSearchOption_0" : "_ctl7_rblSearchOption_1" }, { "__EVENTARGUMENT", "" }, { "_ctl7:rblSearchOption", byName ? "Name" : "Specialty" }, { "__VIEWSTATE", document.select("#Form1 input[name=__VIEWSTATE]").first().val() } }, true); document = Jsoup.parse(EntityUtils.toString(entity)); httppost.releaseConnection(); if (byName) { entity = postForm(hostId, client, httppost, new String[][] { { "__EVENTTARGET", "" }, { "__EVENTARGUMENT", "" }, { "_ctl7:cmdSearch", "Search" }, { "_ctl7:rblSearchOption", "Name" }, { "_ctl7:txtCity", Util.defaultString(criteria.getCity()) }, { "_ctl7:txtFirstName", Util.defaultString(criteria.getFirstName()) }, { "_ctl7:txtLastName", Util.defaultString(criteria.getLastName()) }, { "_ctl7:txtLicNbr", Util.defaultString(criteria.getIdentifier()) }, { "__VIEWSTATE", document.select("#Form1 input[name=__VIEWSTATE]").first().val() } }, true); } else { String code = matchSpecialtyCode(criteria, document); entity = postForm(hostId, client, httppost, new String[][] { { "__EVENTTARGET", "" }, { "__EVENTARGUMENT", "" }, { "_ctl7:cmdSearchSpecialty", "Search" }, { "_ctl7:ddlbSpecialty", code }, { "_ctl7:rblSearchOption", "Specialty" }, { "_ctl7:txtSpecialtyCity", Util.defaultString(criteria.getCity()) }, { "_ctl7:txtSpecialtyZip", Util.defaultString(criteria.getZipcode()) }, { "__VIEWSTATE", document.select("#Form1 input[name=__VIEWSTATE]").first().val() } }, true); } // licenses list List<License> licenseList = new ArrayList<License>(); if (entity != null) { String result = EntityUtils.toString(entity); document = Jsoup.parse(result); Elements rows = document.select( "#_ctl7_grdSearchResults tr.TableItem, #_ctl7_grdSearchResults tr.TableAlternatingItem"); for (Element row : rows) { String href = row.select("a[name=Hyperlink1]").first().attr("href"); String city = row.select("td:eq(4)").text(); String detailsLink = getSearchURL() + "/BMP/DesktopModules/" + href.replaceAll(" ", "%20"); HttpGet detailsGet = new HttpGet(detailsLink); HttpResponse detailsResponse = client.execute(detailsGet); HttpEntity detailsEntity = detailsResponse.getEntity(); if (detailsEntity != null) { Document details = Jsoup.parse(EntityUtils.toString(detailsEntity)); licenseList.add(parseLicense(city, details)); } } } SearchResult<License> result = new SearchResult<License>(); result.setItems(licenseList); return result; }
From source file:mergedoc.core.APIDocument.java
/** * ? Javadoc ????//from ww w . j av a2 s. c o m * @param className ?? * @param docHtml API */ private void parseMethodComment(String className, Document doc) { Elements elements = doc.select("body > div.contentContainer > div.details > ul > li > ul > li > ul > li"); for (Element element : elements) { Element sigElm = element.select("pre").first(); if (sigElm == null) { continue; } String sigStr = sigElm.html(); Signature sig = createSignature(className, sigStr); Comment comment = new Comment(sig); // deprecated String depre = ""; Elements divs = element.select("div"); if (divs.size() == 2) { depre = divs.get(0).html(); } if (divs.size() > 0) { String body = divs.last().html(); body = formatLinkTag(className, body); comment.setDocumentBody(body); } Elements dtTags = element.select("dl dt"); for (Element dtTag : dtTags) { String dtText = dtTag.text(); if (dtText.contains(":")) { Element dd = dtTag; while (true) { dd = dd.nextElementSibling(); if (dd == null || dd.tagName().equalsIgnoreCase("dd") == false) { break; } String name = dd.select("code").first().text(); if (dtText.contains(":")) { name = "<" + name + ">"; } String items = dd.html(); Pattern p = PatternCache .getPattern("(?si)<CODE>(.+?)</CODE>\\s*-\\s*(.*?)(<DD>|</DD>|</DL>|<DT>|$)"); Matcher m = p.matcher(items); if (m.find()) { String desc = formatLinkTag(className, m.group(2)); comment.addParam(name, desc); } } continue; } if (dtText.contains(":")) { Element dd = dtTag.nextElementSibling(); String str = dd.html(); str = formatLinkTag(className, str); comment.addReturn(str); continue; } if (dtText.contains(":")) { Element dd = dtTag; while (true) { dd = dd.nextElementSibling(); if (dd == null || dd.tagName().equalsIgnoreCase("dd") == false) { break; } String name = dd.select("code").first().text(); String items = dd.html(); Pattern p = PatternCache .getPattern("(?si)<CODE>(.+?)</CODE>\\s*-\\s*(.*?)(<DD>|</DD>|</DL>|<DT>|$)"); Matcher m = p.matcher(items); if (m.find()) { String desc = formatLinkTag(className, m.group(2)); String param = name + " " + desc; comment.addThrows(param); } } continue; } } // deprecated parseDeprecatedTag(className, depre, comment); // parseCommonTag(className, element, comment); contextTable.put(sig, comment); } }
From source file:gov.medicaid.screening.dao.impl.ChiropracticLicenseDAOBean.java
/** * Performs a search for all possible results. * //from w w w . jav a 2 s. co m * @param criteria * The search criteria. * @param searchType * the type of search to execute. * @return the search result for licenses * * @throws URISyntaxException * if an error occurs while building the URL. * @throws ClientProtocolException * if client does not support protocol used. * @throws IOException * if an error occurs while parsing response. * @throws ParseException * if an error occurs while parsing response. * @throws ServiceException * for any other problems encountered */ private SearchResult<License> getAllResults(ChiropracticLicenseSearchCriteria criteria, String searchType) throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(); client.setRedirectStrategy(new LaxRedirectStrategy()); URIBuilder uriBuilder = new URIBuilder(getSearchURL()); uriBuilder.addParameter("strName", Util.defaultString(criteria.getLastName())); uriBuilder.addParameter("strSoundex", Util.defaultString(criteria.getLastName())); uriBuilder.addParameter("strCity", Util.defaultString(criteria.getCity())); uriBuilder.addParameter("strZIP", Util.defaultString(criteria.getZipCode())); uriBuilder.addParameter(searchType, TYPES.get(searchType)); HttpGet search = new HttpGet(uriBuilder.build()); HttpResponse response = client.execute(search); verifyAndAuditCall(getSearchURL(), response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); List<License> allLicenses = new ArrayList<License>(); Elements rows = page.select("tr:gt(0)"); for (Element row : rows) { String href = row.select("a").first().attr("href"); // detail link String licenseType = row.select("td:eq(2)").text(); HttpGet getDetails = new HttpGet(Util.replaceLastURLPart(uriBuilder.build().toString(), href)); response = client.execute(getDetails); verifyAndAuditCall(getSearchURL(), response); page = Jsoup.parse(EntityUtils.toString(response.getEntity())); allLicenses.add(parseLicense(page, licenseType)); } SearchResult<License> searchResult = new SearchResult<License>(); searchResult.setItems(allLicenses); return searchResult; }
From source file:gov.medicaid.screening.dao.impl.ChiropracticLicenseDAOBean.java
/** * Retrieves all the practioners from the disciplined list. * /*from w w w. j av a2 s.c o m*/ * @return the list of practitioners in the disciplined list * * @throws URISyntaxException * if an error occurs while building the URL. * @throws ClientProtocolException * if client does not support protocol used. * @throws IOException * if an error occurs while parsing response. * @throws ParseException * if an error occurs while parsing response. * @throws ServiceException * for any other problems encountered */ private SearchResult<License> getAllDisciplinedResults() throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(); client.setRedirectStrategy(new LaxRedirectStrategy()); HttpGet search = new HttpGet(new URIBuilder(getDisciplineURL()).build()); HttpResponse response = client.execute(search); verifyAndAuditCall(getDisciplineURL(), response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); List<License> allLicenses = new ArrayList<License>(); Elements rows = page.select("blockquote table tr:gt(0)"); for (Element row : rows) { String fullname = row.select("td:eq(0)").text(); License license = new License(); ProviderProfile profile = new ProviderProfile(); license.setProfile(profile); profile.setUser(parseDeniedListFullname(fullname)); LicenseStatus status = new LicenseStatus(); String action = row.select("td:eq(1)").text(); status.setName(action); String date = row.select("td:eq(2)").text(); status.setDate(parseDate(date, DATE_FORMAT)); license.setStatus(status); allLicenses.add(license); } SearchResult<License> searchResult = new SearchResult<License>(); searchResult.setItems(allLicenses); return searchResult; }
From source file:eu.riscoss.rdc.RDCFossology.java
/** * Analyses a fossology html file/*from www . j a va 2 s . c o m*/ * @param target * @param licensesMap * @return * @throws IOException */ private HashMap<String, Integer> analyseOverviewReport(String target, HashMap<String, Collection<String>> licensesMap) throws IOException { //private static HashMap<String, Integer> analyseFossologyReport(String target, String licenseFile) throws IOException { // List<String> result = new ArrayList<String>(); Document document; if (target.startsWith("http")) { document = Jsoup.connect(target).get(); } else { File file = new File(target); document = Jsoup.parse(file, "UTF-8", "http://localhost"); } Element table = document.select("table[id=lichistogram]").first(); Elements rows = table.select("tr"); List<LicenseEntry> llist = new ArrayList<LicenseEntry>(); //list of licenses in the fossology file //for each license, parses the name (0) and the number of occurrences (2) and saves it as a LicenseEntry for (Element element : rows) { Elements col = element.select("td"); if (col.size() != 0) { int c = Integer.parseInt(col.get(0).ownText());//num of occurrences String lic = col.get(2).text(); llist.add(new LicenseEntry(c, lic)); } } //get license type buckets HashMap<String, Integer> licenseBuckets = new HashMap<String, Integer>(); int total = 0; Set<String> licenseTypes = licensesMap.keySet(); //initialize with 0 to avoid missing types for (String licensetype : licenseTypes) { licenseBuckets.put(licensetype, 0); } boolean matched = false; int numUnknown = 0; for (LicenseEntry le : llist) { for (String licenseType : licenseTypes) {//cycles on license types from config file if (le.matchesOneOf(licensesMap.get(licenseType), licenseType)) { Integer currentcount = licenseBuckets.get(le.licensetype); if (currentcount == null) //for safety, but should be initialised currentcount = 0; licenseBuckets.put(le.licensetype, currentcount + le.count); matched = true; } } total += le.count; if (matched == false) { //unknown numUnknown += le.count; System.err.println("Unknown license: " + le.getName()); } } licenseBuckets.put("_unknown_", numUnknown); licenseBuckets.put("_sum_", total); licenseBuckets.put("_count_", llist.size()); return licenseBuckets; }