List of usage examples for org.jsoup.nodes Element text
public String text()
From source file:Leitura.Ecobertura.java
public boolean cobertura() { Elements elements = document.getElementsByAttribute("align"); for (Element perc : elements) { if (!perc.text().equals("0%") && !perc.text().equals("N/A")) { return true; }/*w ww . j av a2s . c o m*/ } return false; }
From source file:com.mycompany.searchengineaggregator.SearchEngineAggregator.java
public ArrayList<JSONObject> getSearchResults(String query, SearchEngine searchEngine) throws JSONException { ArrayList<JSONObject> result = new ArrayList<>(); StringBuilder url = new StringBuilder(); Document doc = null;/*from ww w. j a v a2 s .com*/ String userAgent = null; //Handles specific search engine connection details switch (searchEngine) { case Google: url.append("https://www.google.com/search?q="); url.append(query); userAgent = GoogleUserAgent; break; case Yahoo: url.append("https://search.yahoo.com/search?q="); url.append(query); userAgent = YahooUserAgent; break; case Bing: url.append("http://www.bing.com/search?q="); url.append(query); userAgent = BingUserAgent; break; } if (userAgent == null) return null; try { //Creates a connection, and fetches and parses the HTML file doc = Jsoup.connect(url.toString()).data("query", "Java").userAgent(userAgent).cookie("auth", "token") .timeout(3000).get(); } catch (IOException ex) { Logger.getLogger(SearchEngineAggregator.class.getName()).log(Level.SEVERE, null, ex); } if (doc == null) return null; //Get all links with attribute href Elements links = doc.select("a[href]"); int i = 1; //For every link, check if url was outbound, strip non-url substring and append to result for (Element link : links) { String tempLink = link.attr("href"); String tempText = link.text(); //Handles specific search engine result filters switch (searchEngine) { case Google: if (tempLink.startsWith("/url?q=") && !tempText.equals("Cached")) { tempLink = tempLink.replace("/url?q=", ""); result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink)); i++; } break; case Yahoo: if (tempLink.startsWith("http") && !tempText.equals("Cached")) { result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink)); i++; break; } case Bing: if (tempLink.startsWith("http") && !tempText.equals("Cached")) { result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink)); i++; break; } } } return result; }
From source file:de.stkl.gbgvertretungsplan.sync.SyncAdapter.java
private List<String> parseCategories(Element root) { // get table/*from w w w. jav a 2 s. c o m*/ //Log.d(LOG_TAG, root.toString()); Element table = root.select("table.mon_list").first(); // category headlines List<String> categories = new ArrayList<String>(); for (Element headline : table.select("tr:first-child th")) { categories.add(headline.text()); } return categories; }
From source file:neembuu.release1.externalImpl.linkhandler.SaveVideoYoutubeLinkHandlerProvider.java
private BasicLinkHandler.Builder saveVideoExtraction(TrialLinkHandler tlh, int retryCount) throws Exception { String url = tlh.getReferenceLinkString(); BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create(); try {/* w w w .j ava2 s . c o m*/ DefaultHttpClient httpClient = NHttpClient.getNewInstance(); String requestUrl = "http://www.save-video.com/download.php?url=" + URLEncoder.encode(url, "UTF-8"); final String responseString = NHttpClientUtils.getData(requestUrl, httpClient); //Set the group name as the name of the video String nameOfVideo = getVideoName(url); String fileName = "text"; linkHandlerBuilder.setGroupName(nameOfVideo); long c_duration = -1; Document doc = Jsoup.parse(responseString); Elements elements = doc.select(".sv-download-links ul li a"); for (Element element : elements) { String singleUrl = element.attr("href"); if (!singleUrl.startsWith("DownloadFile.php")) { fileName = element.text(); singleUrl = Utils.normalize(singleUrl); LOGGER.log(Level.INFO, "Normalized URL: {0}", singleUrl); long length = NHttpClientUtils.calculateLength(singleUrl, httpClient); //LOGGER.log(Level.INFO,"Length: " + length); if (length <= 0) { continue; /*skip this url*/ } BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder.createFile(); try { // finding video/audio length // String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&"); // long duration = (int)(Double.parseDouble(dur)*1000); // if(c_duration < 0 ){ c_duration = duration; } // fileBuilder.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, duration); // LOGGER.log(Level.INFO,"dur="+dur); } catch (NumberFormatException a) { // ignore } try { // finding the quality short name // String type = fileName.substring(fileName.indexOf("(")+1); String type = fileName; fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION, type); LOGGER.log(Level.INFO, "type={0}", type); } catch (Exception a) { a.printStackTrace(); } fileName = nameOfVideo + " " + fileName; fileBuilder.setName(fileName).setUrl(singleUrl).setSize(length).next(); } } for (OnlineFile of : linkHandlerBuilder.getFiles()) { long dur = of.getPropertyProvider() .getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS); if (dur < 0 && c_duration > 0 && of.getPropertyProvider() instanceof BasicPropertyProvider) { ((BasicPropertyProvider) of.getPropertyProvider()).putLongPropertyValue( PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, c_duration); } } } catch (Exception ex) { ex.printStackTrace(); } return linkHandlerBuilder; }
From source file:com.mycompany.grabberrasskazov.threads.ThreadForPageSave.java
public void indexStory(String pageUrl) { try {/*from www.j av a 2s. c om*/ String oldId = pageUrl.replace(GlobalVars.mainSite, ""); if (!mainBean.storyExists(oldId)) { Stories r = new Stories(); Document doc = Jsoup.connect(pageUrl) .userAgent("Opera/9.80 (X11; Linux x86_64) " + "Presto/2.12.388 Version/12.16").get(); Elements nameBlockElements = doc.select("b:containsOwn(?)"); Element nameBlock = nameBlockElements.get(0); nameBlock = nameBlock.parent().parent(); nameBlockElements = nameBlock.select("td:eq(1)"); nameBlock = nameBlockElements.get(0); String storyName = nameBlock.text(); r.setStoryName(storyName); // Start of processing writer Elements writerBlockElements = doc.select("b:containsOwn(?:)"); Element writerBlock = writerBlockElements.get(0); writerBlock = writerBlock.parent().parent(); writerBlockElements = writerBlock.select("td:eq(1)"); writerBlock = writerBlockElements.get(0); String writersUrl = writerBlock.select("a:eq(0)").attr("href"); String writersName = writerBlock.select("a:eq(0)").text(); String writersContacts = writerBlock.select("a:eq(1)").attr("href"); StoryWriters storyWriter = new StoryWriters(); storyWriter.setOldId(writersUrl); storyWriter.setWriterEmail(writersContacts); storyWriter.setWriterName(writersName); storyWriter = mainBean.saveWriter(storyWriter); Set<StoriesToWritersRelations> storiesToWritersRelationses = new HashSet<StoriesToWritersRelations>(); StoriesToWritersRelations storiesToWritersRelations = new StoriesToWritersRelations(); storiesToWritersRelations.setStories(r); storiesToWritersRelations.setStoryWriters(storyWriter); r.setStoriesToWritersRelationses(storiesToWritersRelationses); // End of processing writer Set<StoriesToCategoriessRelations> catsRelationses = new HashSet<>(); Elements katsInfo = doc.select("a[href*=ras.shtml?kat]"); for (Element kat : katsInfo) { String katId = kat.attr("href"); StoryCategories cat = mainBean.getCat(katId); StoriesToCategoriessRelations catsRelations = new StoriesToCategoriessRelations(); catsRelations.setStoryCategories(cat); catsRelations.setStories(r); catsRelationses.add(catsRelations); } r.setStoriesToCategoriessRelationses(catsRelationses); Elements textBlocks = doc.select("p[align=justify]"); Element textBlock = textBlocks.get(0); String textStr = textBlock.html(); r.setStoryText(textStr.replace("\"", "'")); r.setOldId(oldId); mainBean.saveStory(r); } } catch (IOException ex) { ex.printStackTrace(); } }
From source file:info.smartkit.hairy_batman.query.SogouSearchQuery.java
public void parseWxUserId() { Document doc;//w ww . j a va 2s . c o m try { // need http protocol doc = Jsoup.connect(GlobalConsts.SOGOU_SEARCH_URL_BASE + wxFoo.getSubscribeId()).get(); // get all "?:" value of html <span> Elements openIdSpans = doc.select(GlobalConsts.SOGOU_SEARCH_WX_USER_ID_HTML_ELEMENTS); // for (Element openIdSpan : openIdSpans) { if (openIdSpan.hasText()) { if (openIdSpan.text().contains(GlobalConsts.SOGOU_SEARCH_WX_USER_ID_KEYWORDS)) { // get the value from href attribute LOG.info("openId span text : " + openIdSpan.text()); // FIXME:???? if (this.wxFoo.getUserId() == null) { this.wxFoo.setOpenId( openIdSpan.text().split(GlobalConsts.SOGOU_SEARCH_WX_USER_ID_KEYWORDS)[1]); LOG.info("saved wxUserId value: " + this.wxFoo.getUserId()); GlobalVariables.wxFooListWithUserId.add(this.wxFoo); } } } } } catch (IOException e) { // e.printStackTrace(); LOG.error(e.toString()); } }
From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java
private void renderSyntaxHighlightingHtml(final Document slidesDocument, final Configuration config) { for (Element code : slidesDocument.select("code")) { Charset encoding = config.getInputEncoding(); ByteArrayInputStream input = new ByteArrayInputStream(code.text().getBytes(encoding)); ByteArrayOutputStream out = new ByteArrayOutputStream(); String className = code.className(); if (StringUtils.isBlank(className)) { className = "java"; }//from ww w .j av a2s. c om Renderer renderer = XhtmlRendererFactory.getRenderer(className); if (renderer != null) { try { renderer.highlight("slidery", input, out, encoding.name(), true); code.html(new String(out.toByteArray(), encoding)); code.select("br").remove(); removeComments(code); code.html(code.html().trim()); Element parent = code.parent(); if (parent.tagName() == "pre") { parent.addClass("code"); } } catch (IOException e) { // TODO: Handle exception } } } }
From source file:ac.ucy.cs.spdx.license.License.java
/** * License constructor that creates a new License object based on the * license url and also the category parameters, which is optional. * /* w ww. j ava 2 s. c o m*/ * @param String * @param Category */ public License(String url, Category... category) throws IOException { if (category.length != 0) this.setCategory(category[0]); Document doc = null; doc = Jsoup.connect(url).get(); Element fullName = doc.getElementsByAttributeValue("property", "rdfs:label").get(0); this.setLicenseName(fullName.text()); Element identifier = doc.getElementsByAttributeValue("property", "spdx:licenseId").get(0); this.setIdentifier(identifier.text()); Element licenseText = doc.getElementsByAttributeValue("property", "spdx:licenseText").get(0); this.setLicenseText(WordUtils.wrap(licenseText.text(), 80)); licenses.add(this); saveLicense(this); }
From source file:me.vertretungsplan.parser.ESchoolParser.java
private void parseTable(Element table, SubstitutionScheduleDay day) { for (Element th : table.select("th[colspan=10]")) { String lesson;//w w w . j ava 2 s .co m Pattern pattern = Pattern.compile("(\\d+)\\. Stunde"); Matcher matcher = pattern.matcher(th.text()); if (matcher.find()) { lesson = matcher.group(1); } else { lesson = th.text(); } // skip over table headers Element row = th.parent().nextElementSibling().nextElementSibling(); while (row != null && row.select("th").size() == 0) { Substitution subst = new Substitution(); subst.setLesson(lesson); Elements columns = row.select("td"); String[] classes = columns.get(0).text().split(", |\\+"); subst.setClasses(new HashSet<>(Arrays.asList(classes))); subst.setPreviousTeacher(getPreviousValue(columns.get(1))); subst.setTeacher(getNewValue(columns.get(1))); subst.setPreviousSubject(getPreviousValue(columns.get(2))); subst.setSubject(getNewValue(columns.get(2))); subst.setPreviousRoom(getPreviousValue(columns.get(3))); subst.setRoom(getNewValue(columns.get(3))); if (columns.get(4).text().isEmpty()) { subst.setType("Vertretung"); subst.setColor(colorProvider.getColor("Vertretung")); } else { String desc = columns.get(4).text(); subst.setDesc(desc); String recognizedType = recognizeType(desc); if (recognizedType == null) recognizedType = "Vertretung"; subst.setType(recognizedType); subst.setColor(colorProvider.getColor(recognizedType)); } day.addSubstitution(subst); row = row.nextElementSibling(); } } }
From source file:org.jresponder.util.TextUtil.java
/** * @param cell element that contains whitespace formatting * @return/*from ww w .j a v a2s. co m*/ */ public String getWholeText(Element cell) { String text = null; List<Node> childNodes = cell.childNodes(); if (childNodes.size() > 0) { Node childNode = childNodes.get(0); if (childNode instanceof TextNode) { text = ((TextNode) childNode).getWholeText(); } } if (text == null) { text = cell.text(); } return text; }