List of usage examples for org.jsoup.nodes Element text
public String text()
From source file:Main.java
public static void main(String[] args) throws Exception { String html = "<description>" + "..." + "</description>"; Document doc = Jsoup.parse(html); for (Element desc : doc.select("description")) { String unescapedHtml = desc.text(); String src = Jsoup.parse(unescapedHtml).select("img").first().attr("src"); System.out.println(src);/*from w w w. j ava 2s .co m*/ } System.out.println("Done"); }
From source file:com.rest.samples.getTipoCambioBanxico.java
public static void main(String[] args) { String url = "http://www.banxico.org.mx/tipcamb/llenarTiposCambioAction.do?idioma=sp"; try {/* w w w . ja v a2 s. c o m*/ HttpClient hc = HttpClientBuilder.create().build(); HttpGet request = new HttpGet(url); request.setHeader("User-Agent", "Mozilla/5.0"); request.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); HttpResponse res = hc.execute(request); if (res.getStatusLine().getStatusCode() != 200) { throw new RuntimeException("Failed : HTTP eror code: " + res.getStatusLine().getStatusCode()); } BufferedReader rd = new BufferedReader(new InputStreamReader(res.getEntity().getContent())); StringBuffer result = new StringBuffer(); String line = ""; while ((line = rd.readLine()) != null) { result.append(line); } Document doc = Jsoup.parse(result.toString()); Element tipoCambioFix = doc.getElementById("FIX_DATO"); System.out.println(tipoCambioFix.text()); } catch (IOException ex) { Logger.getLogger(SamplesUseHttpclient.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.mmj.app.common.util.SpiderHtmlUtils.java
public static String parserHtml(String html, String select) { if (StringUtils.isNotEmpty(html)) { Document doc = Jsoup.parse(html); Elements linksElements = doc.select(select); if (linksElements == null || linksElements.isEmpty()) { return null; }//from ww w . j a va 2 s . c o m for (Element ele : linksElements) { return ele.text(); } } return null; }
From source file:Main.java
public static void parseHtml(String html) { Document document = Jsoup.parse(html); Element linkElement = document.select("a").first(); String linkHref = linkElement.attr("href"); // "http://sample.com" String linkText = linkElement.text(); // "This is sample" System.out.println(linkHref); System.out.println(linkText); }
From source file:Main.java
public static String getScrapeText(Elements elements, String query) { String resultString = ""; for (Element element : elements.select(query)) if (element.hasText()) resultString += element.text().trim() + "\n"; return resultString.trim(); }
From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.tipicality.DbpediaCsvDownload.java
private static void download(Element e) throws MalformedURLException, IOException { for (Element c : e.children()) { String tagName = c.tag().getName(); if (tagName.equals("small")) { for (Element c1 : c.children()) { if (c1.tag().getName().equals("a") && c1.text().equalsIgnoreCase("csv")) { String href = c1.attr("href"); System.out.println("Downloading " + href); try { URL remoteFile = new URL(href); ReadableByteChannel rbc = Channels.newChannel(remoteFile.openStream()); String[] s = href.split("\\/"); FileOutputStream fos = new FileOutputStream( DBpediaOntology.DBPEDIA_CSV_FOLDER + s[s.length - 1]); fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); } catch (Exception ex) { ex.printStackTrace(); }//from www.jav a 2 s . c o m } } } else if (tagName.equals("ul")) { for (Element c1 : c.children()) { if (c1.tagName().equals("li")) { download(c1); } } } } }
From source file:Main.java
private static String parseCardContent(Document doc) { try {// w w w. j a v a 2 s . com Element summaryEl = doc.select("div[class=card-summary-content]").get(0); summaryEl.select("sup").remove(); return summaryEl.text(); } catch (Exception e) { e.printStackTrace(); } return null; }
From source file:io.seldon.importer.articles.AttributesImporterUtils.java
public static List<String> getTagsPartsFromMultipleElement(Elements tagsElements) { List<String> tagsParts = new ArrayList<String>(); for (Element e : tagsElements) { String tag = e.text(); tag = StringUtils.strip(tag);/*w ww . ja va 2 s. c o m*/ tag = tag.toLowerCase(); tagsParts.add(tag); } return tagsParts; }
From source file:Main.java
public static String getText(final Element element) { Element textTag = element; while (!textTag.hasText() && textTag.children().size() > 0) { textTag = textTag.children().get(0); }/*from w ww.j a v a 2 s . c o m*/ return textTag.text(); }
From source file:com.ignorelist.kassandra.steam.scraper.HtmlTagLoader.java
private static void copyText(Iterable<Element> elements, Set<String> target) { for (Element element : elements) { final String text = element.text(); if (!Strings.isNullOrEmpty(text)) { target.add(text.trim());//from w ww . j a v a2 s . c o m } } }