Example usage for org.jsoup.select Elements text

List of usage examples for org.jsoup.select Elements text

Introduction

In this page you can find the example usage for org.jsoup.select Elements text.

Prototype

public String text() 

Source Link

Document

Get the combined text of all the matched elements.

Usage

From source file:gpxparser.GpxParser.java

/**
 * @param args the command line arguments
 *//*from   ww w .ja v  a  2s.c om*/
public static void main(String[] args) {
    File input = new File("/home/yonseca/4.gpx");
    Track track = new Track();
    try {
        Document doc = Jsoup.parse(input, "UTF-8");
        //System.out.println(doc.text());
        Elements trackData = doc.getElementsByTag("trk");
        Elements trackName = trackData.select("name");
        track.setName(trackName.text());
        Elements trkPt = trackData.select("trkseg").select("trkpt");
        for (Iterator<Element> iterator = trkPt.iterator(); iterator.hasNext();) {
            Element dataPoint = iterator.next();
            Double lat = NumberUtils.toDouble(dataPoint.attr("lat"));
            Double lon = NumberUtils.toDouble(dataPoint.attr("lon"));
            Double altitude = NumberUtils.toDouble(dataPoint.select("ele").text());
            track.addPoint(lat, lon, altitude);
        }
        System.out.println("");

    } catch (IOException ex) {
        Logger.getLogger(GpxParser.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:ie.nuim.cs.dri.metadata.WebSearch.java

/**
 *
 * @param xmlString//from w  w  w.j  a  v  a2s.  c o  m
 * @param title
 * @return
 * @throws Exception
 */
public static ROS extractCiteSeerMetadata(String xmlString, String title) throws Exception {
    //System.out.println("CiteSeer search returned:\n"+xmlString);
    Document doc = Jsoup.parse(xmlString);
    ROS ros = new ROS();
    String articleTitle = "";
    int pubYear = 0;

    Elements resultElements = doc.getElementsByClass("result");

    for (Element result : resultElements) {
        Elements titleElement = result.getElementsByClass("doc_details");
        System.out.println(titleElement.text() + "\t" + title);
        if (!titleElement.text().equalsIgnoreCase(title)) {
            break;
        } else {
            ros.setArticleTitle(title);
            Elements authorElement = result.getElementsByClass("pubinfo");
            //authors=authorElement.text();
            Elements yearElement = result.getElementsByClass("pubyear");
            String yearStr = yearElement.text().replace(", ", "");
            int year = (yearStr.length() > 0 ? Integer.parseInt(yearStr) : 0);
            System.out.println("year:" + yearElement.text().replace(", ", ""));
            ros.setYear(year);
            Elements citeElement = result.getElementsByClass("citation");
            String[] citedBy = citeElement.text().split(" ");

            int citeby = (citedBy.length > 2 ? Integer.parseInt(citedBy[2]) : -1);
            ros.setCitedByCount(citeby);

            Elements publicationElement = result.getElementsByClass("pubvenue");
            String pub = publicationElement.text().replace("- ", "").toLowerCase();
            if (pub.contains("journal")) {
                ros.setPublicationType("Journal");
            } else if (pub.contains("conference") || pub.contains("conf.") || pub.contains("proc.")) {
                ros.setPublicationType("Conference");
            } else {
                ros.setPublicationType("");
            }
            ros.setPublicationName(pub);

        }

    }
    // }

    return ros;

}

From source file:de.limod.portals.AutoScout.java

private String getPrice(Element result) {
    Elements t = result.select(AutoScout.SELECTOR_PRICE);
    return t.text();
}

From source file:de.limod.portals.AutoScout.java

private String getCreated(Element result) {
    Elements t = result.select(AutoScout.SELECTOR_CREATED);
    return t.text();
}

From source file:de.limod.portals.AutoScout.java

private String getTitle(Element result) {
    Elements t = result.select(AutoScout.SELECTOR_TITLE);
    String title = t.text();
    return title;
}

From source file:dev.maisentito.suca.commands.EnitCommandHandler.java

@Override
public void handleCommand(MessageEvent event, String[] args) throws Throwable {
    Document doc = Jsoup.connect("http://www.wordreference.com/enit/" + StringUtils.join(args, ' '))
            .userAgent(getStringGlobal(Main.GLOBAL_USERAGENT, "")).referrer("http://www.google.com/").get();
    Elements row = doc.body().select("table.WRD:nth-child(2) > tbody:nth-child(1) > tr:nth-child(2)");
    row.select(".tooltip").remove();
    String def = row.text().trim().replace("\n", "");
    event.respond(def);/*from  w  ww. j  a  va2  s .c o  m*/
}

From source file:dev.maisentito.suca.commands.ItenCommandHandler.java

@Override
public void handleCommand(MessageEvent event, String[] args) throws Throwable {
    Document doc = Jsoup.connect("http://www.wordreference.com/iten/" + StringUtils.join(args, ' '))
            .userAgent(getStringGlobal(Main.GLOBAL_USERAGENT, "")).referrer("http://www.google.com/").get();
    Elements row = doc.body().select("table.WRD:nth-child(2) > tbody:nth-child(1) > tr:nth-child(2)");
    row.select(".tooltip").remove();
    String def = row.text().trim().replace("\n", "");
    event.respond(def);//w  w  w . j av a2 s  .c o m
}

From source file:com.ewcms.plugin.crawler.generate.EwcmsContentCrawler.java

/**
 * ?page??//from   w  w  w .j  a  va2s  .  co  m
 */
@Override
public void visit(Page page) {
    try {
        String url = page.getWebURL().getURL();

        page.setContentType("text/html; charset=" + gather.getEncoding());
        Document doc = Jsoup.connect(url).timeout(gather.getTimeOutWait().intValue() * 1000).get();

        String title = doc.title();
        if (gather.getTitleExternal() && gather.getTitleRegex() != null
                && gather.getTitleRegex().length() > 0) {
            Elements titleEles = doc.select(gather.getTitleRegex());
            if (!titleEles.isEmpty()) {
                String tempTitle = titleEles.text();
                if (tempTitle != null && tempTitle.length() > 0) {
                    title = tempTitle;
                }
            }
        }

        if (title != null && title.trim().length() > 0) {
            Elements elements = doc.select(matchRegex);
            if (filterRegex != null && filterRegex.trim().length() > 0) {
                elements = elements.not(filterRegex);
            }
            if (!elements.isEmpty()) {
                String subHtml = elements.html();
                Document blockDoc = Jsoup.parse(subHtml);
                String contentText = blockDoc.html();

                if (gather.getRemoveHref()) {
                    Document moveDoc = Jsoup.parse(contentText);
                    Elements moveEles = moveDoc.select("*").not("a");
                    contentText = moveEles.html();
                }
                if (gather.getRemoveHtmlTag())
                    contentText = doc.text();

                if (isLocal) {
                    contentText = doc.text();

                    Boolean isMatcher = true;
                    for (int i = 0; i < keys.length; i++) {
                        Boolean result = Pattern.compile(keys[i].trim()).matcher(contentText).find();
                        if (!result) {
                            isMatcher = false;
                            break;
                        }
                    }

                    if (isMatcher) {
                        Storage storage = new Storage();
                        storage.setGatherId(gather.getId());
                        storage.setGatherName(gather.getName());
                        storage.setTitle(title);
                        storage.setUrl(url);
                        try {
                            gatherService.addStorage(storage);
                        } catch (Exception e) {
                            logger.error("save storage error : {}", e.getLocalizedMessage());
                        } finally {
                            storage = null;
                        }
                    }
                } else {
                    Content content = new Content();
                    content.setDetail(contentText);
                    content.setPage(1);
                    List<Content> contents = new ArrayList<Content>();
                    contents.add(content);

                    Article article = new Article();
                    article.setTitle(title);
                    article.setContents(contents);

                    articleMainService.addArticleMainByCrawler(article, gather.getChannelId(),
                            CrawlerUtil.USER_NAME);
                }
            }
        }
    } catch (IOException e) {
        logger.warn(e.getLocalizedMessage());
    }
}

From source file:gov.medicaid.screening.dao.impl.NurseAnesthetistsLicenseDAOBean.java

/**
 * Performs the call to the source site, exact match is expected given the parameters.
 *
 * @param criteria the search criteria//from   ww w  . j a  v a  2s  .  com
 * @return the matched result, null if not found
 * @throws IOException if an I/O error is encountered
 * @throws URISyntaxException if the site URL cannot properly be created
 * @throws ServiceException for any other exceptions encountered
 */
private ProviderProfile getProviderProfile(NurseAnesthetistsSearchCriteria criteria)
        throws ServiceException, IOException, URISyntaxException {
    DefaultHttpClient client = new DefaultHttpClient();
    client.setRedirectStrategy(new LaxRedirectStrategy());

    String searchURL = getSearchURL();
    HttpGet getSearch = new HttpGet(new URIBuilder(searchURL).build());
    HttpResponse response = client.execute(getSearch);
    verifyAndAuditCall(searchURL, response);

    Document page = Jsoup.parse(EntityUtils.toString(response.getEntity()));
    HttpPost search = new HttpPost(new URIBuilder(searchURL).build());

    String searchType = "Lookup Certification Status";

    String last4 = criteria.getSsn().substring(criteria.getSsn().length() - 4);
    HttpEntity entity = postForm(searchURL, client, search,
            new String[][] { { "__EVENTARGUMENT", "" }, { "__EVENTTARGET", "" },
                    { "__EVENTVALIDATION", page.select("input[name=__EVENTVALIDATION]").first().val() },
                    { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() },
                    { "ctl00$PageContent$CertRecert$btnLookup", searchType },
                    { "ctl00$PageContent$CertRecert$txtAANANumber", "" + criteria.getAanaNumber() },
                    { "ctl00$PageContent$CertRecert$txtSSNLast4", "" + last4 } },
            true);

    page = Jsoup.parse(EntityUtils.toString(entity));
    Elements message = page.select("#ctl00_PageContent_ucCredentialsControl_lblErrorMessage");
    if (message.size() > 0) {
        if (message.text().startsWith("No individual with a social security number")) {
            // no match, return null
            return null;
        }
    }

    if (criteria.isRecertification()) {
        searchType = "Lookup Recertification Status";
        entity = postForm(searchURL, client, search,
                new String[][] { { "__EVENTARGUMENT", "" }, { "__EVENTTARGET", "" },
                        { "__EVENTVALIDATION", page.select("input[name=__EVENTVALIDATION]").first().val() },
                        { "__VIEWSTATE", page.select("input[name=__VIEWSTATE]").first().val() },
                        { "ctl00$PageContent$CertRecert$btnSwapDisplayMode", searchType } },
                true);
        page = Jsoup.parse(EntityUtils.toString(entity));
    }
    return parseProvider(page);
}

From source file:cd.go.contrib.elasticagents.dockerswarm.elasticagent.executors.AgentStatusReportExecutorTest.java

private void assertServiceDetails(Service service, Document document) {
    final Elements serviceDetails = document.select(".tab-content").attr("ng-show",
            "currenttab == 'service-details'");
    final String serviceDetailsText = serviceDetails.text();

    assertThat(serviceDetailsText, containsString(service.id()));
    assertThat(serviceDetailsText, containsString(service.spec().name()));
    assertThat(serviceDetailsText, containsString(service.spec().taskTemplate().containerSpec().image()));
}