List of usage examples for org.jsoup.nodes Element select
public Elements select(String cssQuery)
From source file:org.schabi.newpipe.services.youtube.YoutubeVideoExtractor.java
/**Provides information about links to other videos on the video page, such as related videos. * This is encapsulated in a VideoPreviewInfo object, * which is a subset of the fields in a full VideoInfo.*/ private VideoPreviewInfo extractVideoPreviewInfo(Element li) { VideoPreviewInfo info = new VideoPreviewInfo(); info.webpage_url = li.select("a.content-link").first().attr("abs:href"); try {/*from w ww . ja va 2 s . c o m*/ info.id = matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url); } catch (Exception e) { e.printStackTrace(); } //todo: check NullPointerException causing info.title = li.select("span.title").first().text(); //this page causes the NullPointerException, after finding it by searching for "tjvg": //https://www.youtube.com/watch?v=Uqg0aEhLFAg String views = li.select("span.view-count").first().text(); Log.i(TAG, "title:" + info.title); Log.i(TAG, "view count:" + views); try { info.view_count = Long.parseLong(li.select("span.view-count").first().text().replaceAll("[^\\d]", "")); } catch (NullPointerException e) {//related videos sometimes have no view count info.view_count = 0; } info.uploader = li.select("span.g-hovercard").first().text(); info.duration = li.select("span.video-time").first().text(); Element img = li.select("img").first(); info.thumbnail_url = img.attr("abs:src"); // Sometimes youtube sends links to gif files which somehow seem to not exist // anymore. Items with such gif also offer a secondary image source. So we are going // to use that if we caught such an item. if (info.thumbnail_url.contains(".gif")) { info.thumbnail_url = img.attr("data-thumb"); } if (info.thumbnail_url.startsWith("//")) { info.thumbnail_url = "https:" + info.thumbnail_url; } return info; }
From source file:org.tinymediamanager.scraper.aebn.AebnMetadataProvider.java
/** * Get movie meta data from aebn.net.// w w w . j a v a 2s .c o m * */ @Override public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception { LOGGER.debug("AEBN: getMetadata() {}", options); // check if there is already meta data present in the result if ((options.getResult() != null) && (options.getResult().getMediaMetadata() != null)) { LOGGER.debug("AEBN: return metadata from cache"); return options.getResult().getMediaMetadata(); } MediaMetadata md = new MediaMetadata(providerInfo.getId()); Elements elements = null; Element element = null; Integer aebnId = 0; // get AebnId from previous search result if ((options.getResult() != null) && (options.getResult().getId() != null)) { aebnId = Integer.parseInt(options.getResult().getId()); LOGGER.debug("AEBN: aebnId() from previous search result = {}", aebnId); // preset some values from search result (if there is one) // Use core.Utils.RemoveSortableName() if you want e.g. "Bourne Legacy, The" -> "The Bourne Legacy". md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, StrgUtils.removeCommonSortableName(options.getResult().getOriginalTitle())); md.storeMetadata(MediaMetadata.TITLE, StrgUtils.removeCommonSortableName(options.getResult().getTitle())); } // or get AebnId from options if (!isValidAebnId(aebnId) && (options.getId(AEBNID) != null)) { LOGGER.debug("AEBN: aebnId() from options = {}", options.getId(AEBNID)); aebnId = Integer.parseInt(options.getId(AEBNID)); } if (!isValidAebnId(aebnId)) { LOGGER.warn("AEBN: no or incorrect aebnId, aborting"); return md; } // ID md.setId(providerInfo.getId(), aebnId); LOGGER.debug("AEBN: aebnId({})", aebnId); // Base download url for data scraping String downloadUrl = BASE_DATAURL + "/dispatcher/movieDetail?movieId=" + aebnId; String locale = options.getLanguage().name(); if (!StringUtils.isBlank(locale)) { downloadUrl = downloadUrl + "&locale=" + locale; LOGGER.debug("AEBN: used locale({})", locale); } // begin download and scrape try { LOGGER.debug("AEBN: download movie detail page"); Url url = new Url(downloadUrl); InputStream in = url.getInputStream(); Document document = Jsoup.parse(in, "UTF-8", ""); in.close(); // Title // <h1 itemprop="name" class="md-movieTitle" >Titelname</h1> LOGGER.debug("AEBN: parse title"); elements = document.getElementsByAttributeValue("class", "md-movieTitle"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); element = elements.first(); String movieTitle = cleanString(element.text()); LOGGER.debug("AEBN: title({})", movieTitle); md.storeMetadata(MediaMetadata.TITLE, movieTitle); } // Poster // front cover: // http://pic.aebn.net/Stream/Movie/Boxcovers/a66568_xlf.jpg String posterUrl = BASE_IMGURL + "/Stream/Movie/Boxcovers/a" + aebnId.toString() + "_xlf.jpg"; md.storeMetadata(MediaMetadata.POSTER_URL, posterUrl); // Fanart/Background // http://pic.aebn.net/Stream/Movie/Scenes/a113324_s534541.jpg // <img class="sceneThumbnail" alt="Scene Thumbnail" title="Scene Thumbnail" onError="..." // src="http://pic.aebn.net/Stream/Movie/Scenes/a113324_s534544.jpg" onclick="..." /> LOGGER.debug("AEBN: parse fanart / scene thumbs"); elements = document.getElementsByAttributeValue("class", "SceneThumbnail"); LOGGER.debug("AEBN: {} elements found", elements.size()); int i = 1; for (Element anchor : elements) { String backgroundUrl = anchor.attr("src"); LOGGER.debug("AEBN: backgroundUrl{}({})", i, backgroundUrl); md.storeMetadata("backgroundUrl" + Integer.valueOf(i).toString(), backgroundUrl); i++; } // Runtime LOGGER.debug("AEBN: parse runtime"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=duration]"); if (elements.size() > 0) { LOGGER.debug("AEBN: " + elements.size() + " elements found (should be one!)"); element = elements.first(); String movieRuntime = cleanString(element.attr("content")); movieRuntime = StrgUtils.substr(movieRuntime, "PT(\\d+)M"); LOGGER.debug("AEBN: runtime({})", movieRuntime); md.storeMetadata(MediaMetadata.RUNTIME, movieRuntime); } // Year LOGGER.debug("AEBN: parse year"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=datePublished]"); if (elements.size() > 0) { LOGGER.debug("AEBN: " + elements.size() + " elements found (should be one!)"); element = elements.first(); String movieYear = cleanString(element.attr("content")); movieYear = StrgUtils.substr(movieYear, "(\\d+)-"); LOGGER.debug("AEBN: year({})", movieYear); md.storeMetadata(MediaMetadata.YEAR, movieYear); } // Series (Collection) LOGGER.debug("AEBN: parse collection"); elements = document.getElementsByAttributeValue("id", "md-details").select("[class=series]"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); element = elements.first(); String movieCollection = cleanString(element.text()); // Fake a TMDB_SET based on the hash value of the collection name int movieCollectionHash = movieCollection.hashCode(); md.storeMetadata(MediaMetadata.COLLECTION_NAME, movieCollection); md.storeMetadata(MediaMetadata.TMDB_SET, movieCollectionHash); LOGGER.debug("AEBN: collection({}), hashcode({})", movieCollection, movieCollectionHash); } // Studio LOGGER.debug("AEBN: parse studio"); elements = document.getElementsByAttributeValue("id", "md-details") .select("[itemprop=productionCompany]"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); String movieStudio = cleanString(elements.first().text()); LOGGER.debug("AEBN: studio({})", movieStudio); md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, movieStudio); } // Genre LOGGER.debug("AEBN: parse genre"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=genre]"); for (Element g : elements) { md.addGenre(getTmmGenre(g.text())); } // add basic genre, since all genres at AEBN could be summarised // into this one md.addGenre(MediaGenres.EROTIC); // Certification // no data scrapeable---but obviously it's adult only, so simply // generate it String movieCertification = null; Certification certification = null; String country = options.getCountry().getAlpha2(); LOGGER.debug("AEBN: generate certification for {}", country); // @formatter:off if (country.equals("DE")) { movieCertification = "FSK 18"; } if (country.equals("US")) { movieCertification = "NC-17"; } if (country.equals("GB")) { movieCertification = "R18"; } if (country.equals("FR")) { movieCertification = "18"; } if (country.equals("ES")) { movieCertification = "PX"; } if (country.equals("JP")) { movieCertification = "R18+"; } if (country.equals("IT")) { movieCertification = "V.M.18"; } if (country.equals("NL")) { movieCertification = "16"; } // @formatter:on certification = Certification.getCertification(options.getCountry(), movieCertification); if (certification != null) { LOGGER.debug("AEBN: certification({})", certification); md.addCertification(certification); } // Plot and Tagline LOGGER.debug("AEBN: parse plot"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=about]"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); String moviePlot = cleanString(elements.first().text()); md.storeMetadata(MediaMetadata.PLOT, moviePlot); // no separate tagline available, so extract the first sentence // from the movie plot String movieTagline = StrgUtils.substr(moviePlot, "^(.*?[.!?:])"); LOGGER.debug("AEBN: tagline(" + movieTagline + ")"); md.storeMetadata(MediaMetadata.TAGLINE, movieTagline); } // Actors LOGGER.debug("AEBN: parse actors"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=actor]"); LOGGER.debug("AEBN: {} actors found", elements.size()); for (Element anchor : elements) { String actorid = StrgUtils.substr(anchor.toString(), "starId=(\\d+)"); String actorname = cleanString(anchor.select("[itemprop=name]").first().text()); String actordetailsurl = BASE_DATAURL + anchor.attr("href"); if (!actorname.isEmpty()) { LOGGER.debug("AEBN: add actor id({}), name({}), details({})", actorid, actorname, actordetailsurl); MediaCastMember cm = new MediaCastMember(); cm.setType(MediaCastMember.CastType.ACTOR); cm.setName(actorname); if (!actorid.isEmpty()) { cm.setId(actorid); } // Actor detail page try { Url starurl = new Url(actordetailsurl); InputStream starurlstream = starurl.getInputStream(); Document stardocument = Jsoup.parse(starurlstream, "UTF-8", ""); starurlstream.close(); Elements elements2 = stardocument.getElementsByAttributeValue("class", "StarInfo"); if (elements2.size() == 0) { LOGGER.debug("AEBN: no additional actor details found"); } else { // Actor image String actorimage = elements2.select("[itemprop=image]").first().attr("src"); LOGGER.debug("AEBN: actor image({})", actorimage); if (!actorimage.isEmpty()) { cm.setImageUrl(actorimage); } // Actor 'fanart' images // unsure if this is ever shown in tmm elements2 = stardocument.getElementsByAttributeValue("class", "StarDetailGallery") .select("a"); LOGGER.debug("AEBN: {} gallery images found", elements2.size()); for (Element thumbnail : elements2) { LOGGER.debug("AEBN: add fanart image({})", thumbnail.attr("href")); cm.addFanart(thumbnail.attr("href")); } } } catch (Exception e) { LOGGER.error("AEBN: Error downloading {}: {}", actordetailsurl, e); } md.addCastMember(cm); } } // Director LOGGER.debug("AEBN: parse director"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=director]"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); String directorid = StrgUtils.substr(elements.toString(), "directorID=(\\d+)"); String directorname = cleanString(elements.select("[itemprop=name]").first().text()); if (!directorname.isEmpty()) { MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR); cm.setName(directorname); if (!directorid.isEmpty()) { cm.setId(directorid); } cm.setImageUrl(""); md.addCastMember(cm); LOGGER.debug("AEBN: add director id({}), name({})", directorid, directorname); } } // Original Title // if we have no original title, just copy the title if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) { md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE)); } } catch (Exception e) { LOGGER.error("AEBN: Error parsing {}: {}", options.getResult().getUrl(), e); } return md; }
From source file:org.tinymediamanager.scraper.hdtrailersnet.HDTrailersNet.java
@Override public List<MediaTrailer> getTrailers(MediaScrapeOptions options) throws Exception { LOGGER.debug("getTrailers() " + options.toString()); List<MediaTrailer> trailers = new ArrayList<MediaTrailer>(); MediaMetadata md = options.getMetadata(); if (md == null || StringUtils.isEmpty(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) { LOGGER.warn("no originalTitle served"); return trailers; }// w w w. j ava2s . c o m String ot = md.getStringValue(MediaMetadata.ORIGINAL_TITLE); // check if the original title is not empty if (StringUtils.isEmpty(ot)) { return trailers; } // best guess String search = "http://www.hd-trailers.net/movie/" + ot.replaceAll("[^a-zA-Z0-9]", "-").replaceAll("--", "-").toLowerCase() + "/"; try { LOGGER.debug("Guessed HD-Trailers Url: " + search); Url url = new CachedUrl(search); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); Elements tr = doc.getElementsByAttributeValue("itemprop", "trailer"); /* * <tr style="" itemprop="trailer" itemscope itemtype="http://schema.org/VideoObject"> <td class="bottomTableDate" rowspan="2">2012-03-30</td> * <td class="bottomTableName" rowspan="2"><span class="standardTrailerName" itemprop="name">Trailer 2</span> <a href= * "http://blog.hd-trailers.net/how-to-download-hd-trailers-from-apple/#workarounds" ><img src="http://static.hd-trailers.net/images/error.png" * width="16" height="16" style="border:0px;vertical-align:middle" alt="Apple Direct Download Unavailable" * title="Apple Direct Download Unavailable" /></a></td> * * <td class="bottomTableResolution"><a href= "http://trailers.apple.com/movies/sony_pictures/meninblack3/meninblack3-tlr2_h480p.mov" * rel="lightbox[res480p 852 480]" title="Men in Black 3 - Trailer 2 - 480p">480p</a></td> <td class="bottomTableResolution"><a href= * "http://trailers.apple.com/movies/sony_pictures/meninblack3/meninblack3-tlr2_h720p.mov" rel="lightbox[res720p 1280 720]" * title="Men in Black 3 - Trailer 2 - 720p">720p</a></td> <td class="bottomTableResolution"><a href= * "http://trailers.apple.com/movies/sony_pictures/meninblack3/meninblack3-tlr2_h1080p.mov" rel="lightbox[res1080p 1920 1080]" * title="Men in Black 3 - Trailer 2 - 1080p">1080p</a></td> <td class="bottomTableIcon"> <a * href="http://trailers.apple.com/trailers/sony_pictures/meninblack3/" target="_blank"> <img * src="http://static.hd-trailers.net/images/apple.ico" alt="Apple" height="16px" width="16px"/></a></td> </tr> <tr> <td * class="bottomTableFileSize">36 MB</td> <td class="bottomTableFileSize">111 MB</td> <td class="bottomTableFileSize">181 MB</td> <td * class="bottomTableEmbed"><a href= * "/embed-code.php?movieId=men-in-black-3&source=1&trailerName=Trailer 2&resolutions=480;720;1080" rel="lightbox[embed 600 600]" * title="Embed this video on your website">embed</a></td> </tr> */ for (Element t : tr) { try { String date = t.select("td.bottomTableDate").first().text(); String title = t.select("td.bottomTableName > span").first().text(); // apple.com urls currently not working (according to hd-trailers) String tr0qual = t.select("td.bottomTableResolution > a").get(0).text(); String tr0url = t.select("td.bottomTableResolution > a").get(0).attr("href"); MediaTrailer trailer = new MediaTrailer(); trailer.setName(title + " (" + date + ")"); trailer.setDate(date); trailer.setUrl(tr0url); trailer.setQuality(tr0qual); trailer.setProvider(getProviderFromUrl(tr0url)); LOGGER.debug(trailer.toString()); trailers.add(trailer); String tr1qual = t.select("td.bottomTableResolution > a").get(1).text(); String tr1url = t.select("td.bottomTableResolution > a").get(1).attr("href"); trailer = new MediaTrailer(); trailer.setName(title + " (" + date + ")"); trailer.setDate(date); trailer.setUrl(tr1url); trailer.setQuality(tr1qual); trailer.setProvider(getProviderFromUrl(tr1url)); LOGGER.debug(trailer.toString()); trailers.add(trailer); String tr2qual = t.select("td.bottomTableResolution > a").get(2).text(); String tr2url = t.select("td.bottomTableResolution > a").get(2).attr("href"); trailer = new MediaTrailer(); trailer.setName(title + " (" + date + ")"); trailer.setDate(date); trailer.setUrl(tr2url); trailer.setQuality(tr2qual); trailer.setProvider(getProviderFromUrl(tr2url)); LOGGER.debug(trailer.toString()); trailers.add(trailer); } catch (IndexOutOfBoundsException i) { // ignore parse errors per line LOGGER.warn("Error parsing HD-Trailers line. Possible missing quality."); } } } catch (Exception e) { LOGGER.error("cannot parse HD-Trailers movie: " + ot, e); // clear cache CachedUrl.removeCachedFileForUrl(search); } finally { } return trailers; }
From source file:org.trec.liveqa.GetYAnswersPropertiesFromQid.java
/** * /*from w ww. ja v a 2s . c om*/ * @param iQid question ID * @return map of features and attributes: question title, body, category, best answer, date * @throws Exception */ public static Map<String, String> extractData(String iQid) throws Exception { Map<String, String> res = new LinkedHashMap<>(); res.put("qid", iQid); // parse date from qid res.put("Date", DATE_FORMAT.parse(iQid.substring(0, 14)).toString()); // get and mine html page String url = URL_PREFIX + iQid; HttpClient client = new HttpClient(); GetMethod method = new GetMethod(url); method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); try { int statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { System.err.println("Method failed: " + method.getStatusLine()); } InputStream responseBody = method.getResponseBodyAsStream(); // strip top levels Document doc = Jsoup.parse(responseBody, "UTF8", url); Element html = doc.child(0); Element body = html.child(1); Element head = html.child(0); // get category res.put("Top level Category", findElementText(body, cc)); // get title res.put("Title", findElementText(head, ct)); // get body res.put("Body", findElementText(head, cb)); // get keywords res.put("Keywords", findElementText(head, ck)); // get best answer Element best_answer_div = html.select("div#ya-best-answer").first(); if (best_answer_div != null) { res.put("Best Answer", findElementText(best_answer_div, cba)); } responseBody.close(); } catch (HttpException e) { System.err.println("Fatal protocol violation: " + e.getMessage()); e.printStackTrace(); } catch (IOException e) { System.err.println("Fatal transport error: " + e.getMessage()); e.printStackTrace(); } finally { method.releaseConnection(); } return res; }
From source file:org.xlrnet.metadict.engines.nobordbok.OrdbokEngine.java
@NotNull private Optional<MonolingualEntry> processTableRow(@NotNull Element tableRow, @NotNull Language language) { MonolingualEntryBuilder entryBuilder = ImmutableMonolingualEntry.builder(); DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder().setLanguage(language); // Extract general form Element oppslagsord = tableRow.getElementsByClass("oppslagsord").first(); if (oppslagsord != null) { extractGeneralForm(objectBuilder, oppslagsord); } else {/*from w w w.ja va 2 s . c o m*/ LOGGER.warn("Unable to find main element - skipping entry."); return Optional.empty(); } // Extract wordclass and determine entrytype String wordClass = tableRow.getElementsByClass("oppsgramordklasse").first().text(); entryBuilder.setEntryType(resolveEntryTypeWithWordClass(wordClass)); // Get meanings Elements meaningCandidates = tableRow.select(".artikkelinnhold > .utvidet > .tyding"); if (meaningCandidates.size() == 0) meaningCandidates = tableRow.select(".artikkelinnhold > .utvidet"); meaningCandidates.forEach(e -> { String meaning = e.childNodes().stream() .filter(node -> (node instanceof TextNode) || (!((Element) node).hasClass("doemeliste") && !node.hasAttr("style") && !((Element) node).hasClass("utvidet") && !((Element) node).hasClass("artikkelinnhold") && !((Element) node).hasClass("kompakt"))) .map((Node n) -> { if (n instanceof Element) return ((Element) n).text(); else return n.toString(); }).collect(Collectors.joining()); meaning = StringEscapeUtils.unescapeHtml4(meaning); meaning = StringUtils.strip(meaning); if (StringUtils.isNotBlank(meaning)) objectBuilder.addMeaning(meaning); }); entryBuilder.setContent(objectBuilder.build()); return Optional.of(entryBuilder.build()); }
From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java
private void extractBilingualSynonyms(@NotNull String queryString, @NotNull Element synonymsTable, @NotNull BilingualQueryResultBuilder resultBuilder, @NotNull Language sourceLanguage) { List<Element> synonymNodes = synonymsTable.select("tr").stream() .filter(e -> e.getElementsByTag("th").size() == 0).collect(Collectors.toList()); if (synonymNodes.size() == 0) { LOGGER.debug("No synonym entries found"); return;/*from www .ja v a 2 s . c o m*/ } String synonymEntryTitle = synonymsTable.select("span.hl").first().text(); Map<String, SynonymGroupBuilder> synonymGroupMap = new HashMap<>(); for (Element synonymNode : synonymNodes) { // Extract only information from the "from"-node (i.e. source language) DictionaryObject newSynonym = processSingleNode( synonymNode.getElementsByClass(CLASS_TRANSLATION).get(0), sourceLanguage, queryString); String groupName = newSynonym.getDescription() != null ? newSynonym.getDescription() : queryString; if (groupName != null) { SynonymGroupBuilder groupBuilder = synonymGroupMap.computeIfAbsent(groupName, (s) -> ImmutableSynonymGroup.builder() .setBaseMeaning(ImmutableDictionaryObject.createSimpleObject(sourceLanguage, s))); groupBuilder.addSynonym(newSynonym); } else { LOGGER.warn("Synonym group is null"); } } SynonymEntryBuilder synonymEntryBuilder = ImmutableSynonymEntry.builder() .setBaseObject(ImmutableDictionaryObject.createSimpleObject(sourceLanguage, synonymEntryTitle)); for (SynonymGroupBuilder synonymGroupBuilder : synonymGroupMap.values()) { synonymEntryBuilder.addSynonymGroup(synonymGroupBuilder.build()); } resultBuilder.addSynonymEntry(synonymEntryBuilder.build()); }
From source file:perflab.LoadrunnerWrapper.java
/** * @param htmlSummaryFile - load runner analysis html report file to parse * @param summaryFile - location of summary file to be generated out of loadrunner html analysis *//*from w w w . j a v a2s. co m*/ protected void parseSummaryFile(String htmlSummaryFile, String summaryFile) { try { File input = new File(htmlSummaryFile); Document document = Jsoup.parse(input, "UTF-8"); Document parse = Jsoup.parse(document.html()); Elements table = parse.select("table").select("[summary=Transactions statistics summary table]"); Elements rows = table.select("tr"); getLog().info("number of rows in summary file=" + rows.size()); for (Element row : rows) { //getLog().info("table element = " + row.toString()); String name = row.select("td[headers=LraTransaction Name]").select("span").text(); if (!name.isEmpty()) { float avgRT = Float.valueOf(row.select("td[headers=LraAverage]").select("span").text()); float minRT = Float.valueOf(row.select("td[headers=LraMinimum]").select("span").text()); float maxRT = Float.valueOf(row.select("td[headers=LraMaximum]").select("span").text()); int passed = Integer.valueOf(row.select("td[headers=LraPass]").select("span").text() .replace(".", "").replace(",", "")); int failed = Integer.valueOf(row.select("td[headers=LraFail]").select("span").text() .replace(".", "").replace(",", "")); int failedPrecentage = failed / (failed + passed) * 100; getLog().info("Saving Transaction [" + name + "]"); this.transactions.add( new LoadRunnerTransaction(name, minRT, avgRT, maxRT, passed, failed, failedPrecentage)); } } } catch (IOException e) { getLog().error("Can't read LoadRunner Analysis html report " + e.getMessage()); } }
From source file:perflab.loadrunnerwrapperjenkins.LoadRunnerWrapper.java
/** * @param htmlSummaryFile - load runner analysis html report file to parse * @param summaryFile - location of summary file to be generated out of loadrunner * html analysis/*ww w . j av a 2 s . co m*/ */ protected void parseSummaryFile(String htmlSummaryFile, String summaryFile) { try { File input = new File(htmlSummaryFile); Document document = Jsoup.parse(input, "UTF-8"); Document parse = Jsoup.parse(document.html()); Elements table = parse.select("table").select("[summary=Transactions statistics summary table]"); Elements rows = table.select("tr"); logger.println("number of rows in summary file=" + rows.size()); for (Element row : rows) { // logger.println("table element = " + row.toString()); String name = row.select("td[headers=LraTransaction Name]").select("span").text(); if (!name.isEmpty()) { float avgRT = Float.valueOf(row.select("td[headers=LraAverage]").select("span").text()); float minRT = Float.valueOf(row.select("td[headers=LraMinimum]").select("span").text()); float maxRT = Float.valueOf(row.select("td[headers=LraMaximum]").select("span").text()); int passed = Integer.valueOf(row.select("td[headers=LraPass]").select("span").text() .replace(".", "").replace(",", "")); int failed = Integer.valueOf(row.select("td[headers=LraFail]").select("span").text() .replace(".", "").replace(",", "")); // logger.println("Saving Transaction [" + name + "]"); this.transactions.add(new LoadRunnerTransaction(name, minRT, avgRT, maxRT, passed, failed)); } } } catch (IOException e) { logger.println("Can't read LoadRunner Analysis html report " + e.getMessage()); } }
From source file:poe.trade.assist.UniquesListSearchGenerator.java
/**imgurl, reqLvl, base, mod * @param args// w w w . ja v a 2s. c o m * @throws Exception */ public static void main(String[] args) throws Exception { List<String> outputLines = new LinkedList<>(); outputLines.add( "Name Art Req.Level Base Mods TaslismanSC TalismanHC Standard Hardcore poewiki"); for (String list : lists) { HttpResponse<String> response = Unirest.get("http://pathofexile.gamepedia.com/" + list) .header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0") .asString(); Document doc = Jsoup.parse(response.getBody()); Elements elems = doc.select("table.wikitable.sortable"); for (Element table : elems) { Elements rows = table.select("tr"); int ctr = 0; boolean hasRequiredLevel = false; for (Element row : rows) { if (ctr == 0) { // first row is headers hasRequiredLevel = !row.select("abbr[title=\"Required Level\"]").isEmpty(); ctr++; continue; } String name = row.child(0).child(0).attr("title"); System.out.println("Now processing: " + name); String imgurl = "=IMAGE(\"" + row.select("img").attr("src") + "\", 3)"; String base = row.child(1).child(0).attr("title"); String reqLvl = hasRequiredLevel ? row.child(2).text() : "0"; reqLvl = reqLvl.equalsIgnoreCase("n/a") ? "0" : reqLvl; String mod = "=\""; Elements mods = row.select("span.itemboxstatsgroup.text-mod"); if (!mods.isEmpty()) { if (mods.size() > 2) throw new Exception("mods.size() is > 2. " + name + " - " + mods.toString()); boolean hasImplicit = mods.size() > 1; String imp = hasImplicit ? mods.get(0).text() : ""; int expIdx = hasImplicit ? 1 : 0; String lineSeparator = "\"&CHAR(10)&\""; String exp = mods.get(expIdx).textNodes().stream().map(n -> n.text().trim()) .filter(s -> !s.isEmpty()).collect(Collectors.joining(lineSeparator)); String additionalExp = mods.get(expIdx).children().stream().filter(e -> e.hasText()) .map(e -> e.text().trim()).collect(Collectors.joining(lineSeparator)); if (additionalExp != null && !additionalExp.isEmpty()) exp += lineSeparator + additionalExp; mod += imp; if (hasImplicit) mod += (lineSeparator + "--------------" + lineSeparator); mod += exp; } mod += "\""; String standard = "Standard"; String hardcore = "Hardcore"; String tempsc = "Talisman"; String temphc = "Talisman+Hardcore"; String nameenc = URLEncoder.encode(name, "UTF-8"); String sc = hyperlink(getSearchURL(standard, nameenc)); String hc = hyperlink(getSearchURL(hardcore, nameenc)); String tsc = hyperlink(getSearchURL(tempsc, nameenc)); String thc = hyperlink(getSearchURL(temphc, nameenc)); String poewikiurl = hyperlink("http://pathofexile.gamepedia.com/" + (name.replace(' ', '_'))); String s = format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", name, imgurl, reqLvl, base, mod, tsc, thc, sc, hc, poewikiurl); outputLines.add(s); Thread.sleep(1000); } } } FileUtils.writeLines(new File("uniqueslist.txt"), outputLines); }
From source file:reportestrimestrales.Documento.java
protected void juntaDirectiva() { try {// w ww.ja va2s . c o m Document doc = Jsoup.connect("http://www.ine.gob.gt/index.php/institucion/organizacion").get(); Elements tables = doc.select("tbody"); Element juntaDirectiva = tables.get(0); Elements miembros = juntaDirectiva.select("tr"); String[] temp; for (int i = 0; i < miembros.size(); i++) { String[] partes = miembros.get(i).text().split("Suplente"); if (i % 2 != 0) { junta.add(partes[0]); junta.add("Suplente" + partes[1]); } else { junta.add(partes[0]); } } Element tablaGerente = tables.get(1); Elements gerente = tablaGerente.select("tr"); gerencias.add(gerente.get(0).text().split("Gerente")[1]); Element tablaGerencias = tables.get(2); Elements subgerencias = tablaGerencias.select("td"); gerencias.add(subgerencias.get(0).text().split("[Ss]ubgerencia [Aa]dministrativa [Ff]inanciera")[1]); gerencias.add(subgerencias.get(1).text().split("[Ss]ubgerencia [Tt][e]cnica")[1]); System.out.println(gerencias.get(1)); System.out.println(gerencias.get(2)); } catch (IOException ex) { Logger.getLogger(Documento.class.getName()).log(Level.SEVERE, null, ex); } try { FileWriter escritora = new FileWriter(tex, true); BufferedWriter buffer = new BufferedWriter(escritora); buffer.write("\\hoja{\n" + " $\\ $\n" + " \\vspace{0.3cm}\n" + " \n" + " \\begin{center}\n" + " {\\Bold \\LARGE AUTORIDADES}\\\\[0.7cm]\n" + " \n" + " \n" + " {\\Bold \\large \\color{color1!89!black} JUNTA DIRECTIVA} \\\\[0.5cm]\n" + " \n" + " \\begin{center}\n" + " \\begin{tabular}{x{7.0cm}x{7.0cm}}\n"); buffer.write( "\t \t \t { \\Bold Ministerio de Economa} & {\\Bold Ministerio de Finanzas}\\\\ \n"); buffer.write("\t \t \t " + junta.get(junta.indexOf("Ministerio de Economa") + 1) + " & " + junta.get(junta.indexOf("Ministerio de Finanzas Pblicas") + 1) + " \\\\ \n"); buffer.write("\t \t \t " + junta.get(junta.indexOf("Ministerio de Economa") + 2) + " & " + junta.get(junta.indexOf("Ministerio de Finanzas Pblicas") + 2) + " \\\\ \n"); buffer.write("\t \t \t & \\\\\n" + " {\\Bold Ministerio de Agricultura,} & {\\Bold Ministerio de Energa y Minas}\\\\ \n" + " {\\Bold Ganadera y Alimentacin} & " + junta.get(junta.indexOf("Ministerio de Energa y Minas") + 1) + "\\\\ \n"); buffer.write("\t \t \t " + junta.get(junta.indexOf("Ministerio de Agricultura, Ganadera y Alimentacin") + 1) + " & " + junta.get(junta.indexOf("Ministerio de Energa y Minas") + 2) + " \\\\ \n"); buffer.write("\t \t \t " + junta.get(junta.indexOf("Ministerio de Agricultura, Ganadera y Alimentacin") + 2) + " & \\\\ \n"); buffer.write("\t \t \t & {\\Bold Banco de Guatemala} \\\\ \n"); buffer.write("\t \t \t {\\Bold Secretara de Planificacin y} &" + junta.get(junta.indexOf("Banco de Guatemala") + 1) + "\\\\\n" + " {\t \t \t \\Bold Programacin de la Presidencia} & " + junta.get(junta.indexOf("Banco de Guatemala") + 2) + " \\\\\n \t \t \t " + junta.get( junta.indexOf("Secretara de Planificacin y Programacin de la Presidencia") + 1) + " & \\\\ \n"); buffer.write("\t \t \t " + junta.get( junta.indexOf("Secretara de Planificacin y Programacin de la Presidencia") + 2) + " & {\\Bold Universidad de San Carlos de Guatemala} \\\\ \n"); buffer.write("&" + junta.get(junta.indexOf("Universidad de San Carlos de Guatemala") + 1) + " \\\\\n" + " {\\Bold Comit Coordinador de } &" + junta.get(junta.indexOf("Universidad de San Carlos de Guatemala") + 2) + " \\\\ \n" + " {\\Bold Asociaciones Agrcolas, Comerciales, } & \\\\\n" + " {\\Bold Industriales y Financieras} & {\\Bold Universidades Privadas} \\\\\n" + junta.get(junta.indexOf( "Comit Coordinador de Asociaciones Agrcolas, Comerciales, Industriales y Financieras") + 1) + " & " + junta.get(junta.indexOf("Universidades Privadas") + 1) + "\\\\\n" + junta.get(junta.indexOf( "Comit Coordinador de Asociaciones Agrcolas, Comerciales, Industriales y Financieras") + 2) + " & " + junta.get(junta.indexOf("Universidades Privadas") + 2) + "\\\\\n" + " \\end{tabular} \n" + " \\end{center} \n "); buffer.write(" \n" + " {\\Bold \\large \\color{color1!89!black} GERENCIA}\\\\[0.2cm]\n" + "Gerente: " + gerencias.get(0) + " \\\\\n" + "Subgerente Tcnico: " + gerencias.get(2) + "\\\\\n" + "Subgerente Administrativo Financiero: " + gerencias.get(1) + "\\\\ \n"); buffer.write("\t \t \t \\end{center}\n"); buffer.write("\t \t } \n"); buffer.close(); } catch (IOException ex) { Logger.getLogger(Documento.class.getName()).log(Level.SEVERE, null, ex); } }