List of usage examples for org.jsoup.select Elements text
public String text()
From source file:gpxparser.GpxParser.java
/** * @param args the command line arguments *//*from ww w .ja v a 2s.c om*/ public static void main(String[] args) { File input = new File("/home/yonseca/4.gpx"); Track track = new Track(); try { Document doc = Jsoup.parse(input, "UTF-8"); //System.out.println(doc.text()); Elements trackData = doc.getElementsByTag("trk"); Elements trackName = trackData.select("name"); track.setName(trackName.text()); Elements trkPt = trackData.select("trkseg").select("trkpt"); for (Iterator<Element> iterator = trkPt.iterator(); iterator.hasNext();) { Element dataPoint = iterator.next(); Double lat = NumberUtils.toDouble(dataPoint.attr("lat")); Double lon = NumberUtils.toDouble(dataPoint.attr("lon")); Double altitude = NumberUtils.toDouble(dataPoint.select("ele").text()); track.addPoint(lat, lon, altitude); } System.out.println(""); } catch (IOException ex) { Logger.getLogger(GpxParser.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:ie.nuim.cs.dri.metadata.WebSearch.java
/** * * @param xmlString//from w w w.j a v a2s. c o m * @param title * @return * @throws Exception */ public static ROS extractCiteSeerMetadata(String xmlString, String title) throws Exception { //System.out.println("CiteSeer search returned:\n"+xmlString); Document doc = Jsoup.parse(xmlString); ROS ros = new ROS(); String articleTitle = ""; int pubYear = 0; Elements resultElements = doc.getElementsByClass("result"); for (Element result : resultElements) { Elements titleElement = result.getElementsByClass("doc_details"); System.out.println(titleElement.text() + "\t" + title); if (!titleElement.text().equalsIgnoreCase(title)) { break; } else { ros.setArticleTitle(title); Elements authorElement = result.getElementsByClass("pubinfo"); //authors=authorElement.text(); Elements yearElement = result.getElementsByClass("pubyear"); String yearStr = yearElement.text().replace(", ", ""); int year = (yearStr.length() > 0 ? Integer.parseInt(yearStr) : 0); System.out.println("year:" + yearElement.text().replace(", ", "")); ros.setYear(year); Elements citeElement = result.getElementsByClass("citation"); String[] citedBy = citeElement.text().split(" "); int citeby = (citedBy.length > 2 ? Integer.parseInt(citedBy[2]) : -1); ros.setCitedByCount(citeby); Elements publicationElement = result.getElementsByClass("pubvenue"); String pub = publicationElement.text().replace("- ", "").toLowerCase(); if (pub.contains("journal")) { ros.setPublicationType("Journal"); } else if (pub.contains("conference") || pub.contains("conf.") || pub.contains("proc.")) { ros.setPublicationType("Conference"); } else { ros.setPublicationType(""); } ros.setPublicationName(pub); } } // } return ros; }
From source file:de.limod.portals.AutoScout.java
private String getPrice(Element result) { Elements t = result.select(AutoScout.SELECTOR_PRICE); return t.text(); }
From source file:de.limod.portals.AutoScout.java
private String getCreated(Element result) { Elements t = result.select(AutoScout.SELECTOR_CREATED); return t.text(); }
From source file:de.limod.portals.AutoScout.java
private String getTitle(Element result) { Elements t = result.select(AutoScout.SELECTOR_TITLE); String title = t.text(); return title; }
From source file:dev.maisentito.suca.commands.EnitCommandHandler.java
@Override public void handleCommand(MessageEvent event, String[] args) throws Throwable { Document doc = Jsoup.connect("http://www.wordreference.com/enit/" + StringUtils.join(args, ' ')) .userAgent(getStringGlobal(Main.GLOBAL_USERAGENT, "")).referrer("http://www.google.com/").get(); Elements row = doc.body().select("table.WRD:nth-child(2) > tbody:nth-child(1) > tr:nth-child(2)"); row.select(".tooltip").remove(); String def = row.text().trim().replace("\n", ""); event.respond(def);/*from w ww. j a va2 s .c o m*/ }
From source file:dev.maisentito.suca.commands.ItenCommandHandler.java
@Override public void handleCommand(MessageEvent event, String[] args) throws Throwable { Document doc = Jsoup.connect("http://www.wordreference.com/iten/" + StringUtils.join(args, ' ')) .userAgent(getStringGlobal(Main.GLOBAL_USERAGENT, "")).referrer("http://www.google.com/").get(); Elements row = doc.body().select("table.WRD:nth-child(2) > tbody:nth-child(1) > tr:nth-child(2)"); row.select(".tooltip").remove(); String def = row.text().trim().replace("\n", ""); event.respond(def);//w w w . j av a2 s .c o m }
From source file:com.ewcms.plugin.crawler.generate.EwcmsContentCrawler.java
/** * ?page??//from w w w .j a va2s . co m */ @Override public void visit(Page page) { try { String url = page.getWebURL().getURL(); page.setContentType("text/html; charset=" + gather.getEncoding()); Document doc = Jsoup.connect(url).timeout(gather.getTimeOutWait().intValue() * 1000).get(); String title = doc.title(); if (gather.getTitleExternal() && gather.getTitleRegex() != null && gather.getTitleRegex().length() > 0) { Elements titleEles = doc.select(gather.getTitleRegex()); if (!titleEles.isEmpty()) { String tempTitle = titleEles.text(); if (tempTitle != null && tempTitle.length() > 0) { title = tempTitle; } } } if (title != null && title.trim().length() > 0) { Elements elements = doc.select(matchRegex); if (filterRegex != null && filterRegex.trim().length() > 0) { elements = elements.not(filterRegex); } if (!elements.isEmpty()) { String subHtml = elements.html(); Document blockDoc = Jsoup.parse(subHtml); String contentText = blockDoc.html(); if (gather.getRemoveHref()) { Document moveDoc = Jsoup.parse(contentText); Elements moveEles = moveDoc.select("*").not("a"); contentText = moveEles.html(); } if (gather.getRemoveHtmlTag()) contentText = doc.text(); if (isLocal) { contentText = doc.text(); Boolean isMatcher = true; for (int i = 0; i < keys.length; i++) { Boolean result = Pattern.compile(keys[i].trim()).matcher(contentText).find(); if (!result) { isMatcher = false; break; } } if (isMatcher) { Storage storage = new Storage(); storage.setGatherId(gather.getId()); storage.setGatherName(gather.getName()); storage.setTitle(title); storage.setUrl(url); try { gatherService.addStorage(storage); } catch (Exception e) { logger.error("save storage error : {}", e.getLocalizedMessage()); } finally { storage = null; } } } else { Content content = new Content(); content.setDetail(contentText); content.setPage(1); List<Content> contents = new ArrayList<Content>(); contents.add(content); Article article = new Article(); article.setTitle(title); article.setContents(contents); articleMainService.addArticleMainByCrawler(article, gather.getChannelId(), CrawlerUtil.USER_NAME); } } } } catch (IOException e) { logger.warn(e.getLocalizedMessage()); } }
From source file:gov.medicaid.screening.dao.impl.NurseAnesthetistsLicenseDAOBean.java
/** * Performs the call to the source site, exact match is expected given the parameters. * * @param criteria the search criteria//from ww w . j a v a 2s . com * @return the matched result, null if not found * @throws IOException if an I/O error is encountered * @throws URISyntaxException if the site URL cannot properly be created * @throws ServiceException for any other exceptions encountered */ private ProviderProfile getProviderProfile(NurseAnesthetistsSearchCriteria criteria) throws ServiceException, IOException, URISyntaxException { DefaultHttpClient client = new DefaultHttpClient(); client.setRedirectStrategy(new LaxRedirectStrategy()); String searchURL = getSearchURL(); HttpGet getSearch = new HttpGet(new URIBuilder(searchURL).build()); HttpResponse response = client.execute(getSearch); verifyAndAuditCall(searchURL, response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); HttpPost search = new HttpPost(new URIBuilder(searchURL).build()); String searchType = "Lookup Certification Status"; String last4 = criteria.getSsn().substring(criteria.getSsn().length() - 4); HttpEntity entity = postForm(searchURL, client, search, new String[][] { { "__EVENTARGUMENT", "" }, { "__EVENTTARGET", "" }, { "__EVENTVALIDATION", page.select("input[name=__EVENTVALIDATION]").first().val() }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() }, { "ctl00$PageContent$CertRecert$btnLookup", searchType }, { "ctl00$PageContent$CertRecert$txtAANANumber", "" + criteria.getAanaNumber() }, { "ctl00$PageContent$CertRecert$txtSSNLast4", "" + last4 } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); Elements message = page.select("#ctl00_PageContent_ucCredentialsControl_lblErrorMessage"); if (message.size() > 0) { if (message.text().startsWith("No individual with a social security number")) { // no match, return null return null; } } if (criteria.isRecertification()) { searchType = "Lookup Recertification Status"; entity = postForm(searchURL, client, search, new String[][] { { "__EVENTARGUMENT", "" }, { "__EVENTTARGET", "" }, { "__EVENTVALIDATION", page.select("input[name=__EVENTVALIDATION]").first().val() }, { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() }, { "ctl00$PageContent$CertRecert$btnSwapDisplayMode", searchType } }, true); page = Jsoup.parse(EntityUtils.toString(entity)); } return parseProvider(page); }
From source file:cd.go.contrib.elasticagents.dockerswarm.elasticagent.executors.AgentStatusReportExecutorTest.java
private void assertServiceDetails(Service service, Document document) { final Elements serviceDetails = document.select(".tab-content").attr("ng-show", "currenttab == 'service-details'"); final String serviceDetailsText = serviceDetails.text(); assertThat(serviceDetailsText, containsString(service.id())); assertThat(serviceDetailsText, containsString(service.spec().name())); assertThat(serviceDetailsText, containsString(service.spec().taskTemplate().containerSpec().image())); }