Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:org.schabi.newpipe.services.youtube.YoutubeVideoExtractor.java

/**Provides information about links to other videos on the video page, such as related videos.
 * This is encapsulated in a VideoPreviewInfo object,
 * which is a subset of the fields in a full VideoInfo.*/
private VideoPreviewInfo extractVideoPreviewInfo(Element li) {
    VideoPreviewInfo info = new VideoPreviewInfo();
    info.webpage_url = li.select("a.content-link").first().attr("abs:href");
    try {/*from w  ww  .  ja  va  2 s  .  c  o  m*/
        info.id = matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url);
    } catch (Exception e) {
        e.printStackTrace();
    }

    //todo: check NullPointerException causing
    info.title = li.select("span.title").first().text();
    //this page causes the NullPointerException, after finding it by searching for "tjvg":
    //https://www.youtube.com/watch?v=Uqg0aEhLFAg
    String views = li.select("span.view-count").first().text();
    Log.i(TAG, "title:" + info.title);
    Log.i(TAG, "view count:" + views);
    try {
        info.view_count = Long.parseLong(li.select("span.view-count").first().text().replaceAll("[^\\d]", ""));
    } catch (NullPointerException e) {//related videos sometimes have no view count
        info.view_count = 0;
    }
    info.uploader = li.select("span.g-hovercard").first().text();

    info.duration = li.select("span.video-time").first().text();

    Element img = li.select("img").first();
    info.thumbnail_url = img.attr("abs:src");
    // Sometimes youtube sends links to gif files which somehow seem to not exist
    // anymore. Items with such gif also offer a secondary image source. So we are going
    // to use that if we caught such an item.
    if (info.thumbnail_url.contains(".gif")) {
        info.thumbnail_url = img.attr("data-thumb");
    }
    if (info.thumbnail_url.startsWith("//")) {
        info.thumbnail_url = "https:" + info.thumbnail_url;
    }
    return info;
}

From source file:org.tinymediamanager.scraper.aebn.AebnMetadataProvider.java

/**
 * Get movie meta data from aebn.net.// w w w  .  j  a  v  a  2s  .c  o  m
 *
 */
@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("AEBN: getMetadata() {}", options);

    // check if there is already meta data present in the result
    if ((options.getResult() != null) && (options.getResult().getMediaMetadata() != null)) {
        LOGGER.debug("AEBN: return metadata from cache");
        return options.getResult().getMediaMetadata();
    }

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    Elements elements = null;
    Element element = null;
    Integer aebnId = 0;

    // get AebnId from previous search result
    if ((options.getResult() != null) && (options.getResult().getId() != null)) {
        aebnId = Integer.parseInt(options.getResult().getId());
        LOGGER.debug("AEBN: aebnId() from previous search result = {}", aebnId);
        // preset some values from search result (if there is one)
        // Use core.Utils.RemoveSortableName() if you want e.g. "Bourne Legacy, The" -> "The Bourne Legacy".
        md.storeMetadata(MediaMetadata.ORIGINAL_TITLE,
                StrgUtils.removeCommonSortableName(options.getResult().getOriginalTitle()));
        md.storeMetadata(MediaMetadata.TITLE,
                StrgUtils.removeCommonSortableName(options.getResult().getTitle()));
    }

    // or get AebnId from options
    if (!isValidAebnId(aebnId) && (options.getId(AEBNID) != null)) {
        LOGGER.debug("AEBN: aebnId() from options = {}", options.getId(AEBNID));
        aebnId = Integer.parseInt(options.getId(AEBNID));
    }

    if (!isValidAebnId(aebnId)) {
        LOGGER.warn("AEBN: no or incorrect aebnId, aborting");
        return md;
    }

    // ID
    md.setId(providerInfo.getId(), aebnId);
    LOGGER.debug("AEBN: aebnId({})", aebnId);

    // Base download url for data scraping
    String downloadUrl = BASE_DATAURL + "/dispatcher/movieDetail?movieId=" + aebnId;
    String locale = options.getLanguage().name();
    if (!StringUtils.isBlank(locale)) {
        downloadUrl = downloadUrl + "&locale=" + locale;
        LOGGER.debug("AEBN: used locale({})", locale);
    }

    // begin download and scrape
    try {
        LOGGER.debug("AEBN: download movie detail page");
        Url url = new Url(downloadUrl);
        InputStream in = url.getInputStream();
        Document document = Jsoup.parse(in, "UTF-8", "");
        in.close();

        // Title
        // <h1 itemprop="name" class="md-movieTitle" >Titelname</h1>
        LOGGER.debug("AEBN: parse title");
        elements = document.getElementsByAttributeValue("class", "md-movieTitle");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            element = elements.first();
            String movieTitle = cleanString(element.text());
            LOGGER.debug("AEBN: title({})", movieTitle);
            md.storeMetadata(MediaMetadata.TITLE, movieTitle);
        }

        // Poster
        // front cover:
        // http://pic.aebn.net/Stream/Movie/Boxcovers/a66568_xlf.jpg
        String posterUrl = BASE_IMGURL + "/Stream/Movie/Boxcovers/a" + aebnId.toString() + "_xlf.jpg";
        md.storeMetadata(MediaMetadata.POSTER_URL, posterUrl);

        // Fanart/Background
        // http://pic.aebn.net/Stream/Movie/Scenes/a113324_s534541.jpg
        // <img class="sceneThumbnail" alt="Scene Thumbnail" title="Scene Thumbnail" onError="..."
        // src="http://pic.aebn.net/Stream/Movie/Scenes/a113324_s534544.jpg" onclick="..." />
        LOGGER.debug("AEBN: parse fanart / scene thumbs");
        elements = document.getElementsByAttributeValue("class", "SceneThumbnail");
        LOGGER.debug("AEBN: {} elements found", elements.size());
        int i = 1;
        for (Element anchor : elements) {
            String backgroundUrl = anchor.attr("src");
            LOGGER.debug("AEBN: backgroundUrl{}({})", i, backgroundUrl);
            md.storeMetadata("backgroundUrl" + Integer.valueOf(i).toString(), backgroundUrl);
            i++;
        }

        // Runtime
        LOGGER.debug("AEBN: parse runtime");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=duration]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: " + elements.size() + " elements found (should be one!)");
            element = elements.first();
            String movieRuntime = cleanString(element.attr("content"));
            movieRuntime = StrgUtils.substr(movieRuntime, "PT(\\d+)M");
            LOGGER.debug("AEBN: runtime({})", movieRuntime);
            md.storeMetadata(MediaMetadata.RUNTIME, movieRuntime);
        }

        // Year
        LOGGER.debug("AEBN: parse year");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=datePublished]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: " + elements.size() + " elements found (should be one!)");
            element = elements.first();
            String movieYear = cleanString(element.attr("content"));
            movieYear = StrgUtils.substr(movieYear, "(\\d+)-");
            LOGGER.debug("AEBN: year({})", movieYear);
            md.storeMetadata(MediaMetadata.YEAR, movieYear);
        }

        // Series (Collection)
        LOGGER.debug("AEBN: parse collection");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[class=series]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            element = elements.first();
            String movieCollection = cleanString(element.text());

            // Fake a TMDB_SET based on the hash value of the collection name
            int movieCollectionHash = movieCollection.hashCode();

            md.storeMetadata(MediaMetadata.COLLECTION_NAME, movieCollection);
            md.storeMetadata(MediaMetadata.TMDB_SET, movieCollectionHash);
            LOGGER.debug("AEBN: collection({}), hashcode({})", movieCollection, movieCollectionHash);
        }

        // Studio
        LOGGER.debug("AEBN: parse studio");
        elements = document.getElementsByAttributeValue("id", "md-details")
                .select("[itemprop=productionCompany]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            String movieStudio = cleanString(elements.first().text());
            LOGGER.debug("AEBN: studio({})", movieStudio);
            md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, movieStudio);
        }

        // Genre
        LOGGER.debug("AEBN: parse genre");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=genre]");
        for (Element g : elements) {
            md.addGenre(getTmmGenre(g.text()));
        }
        // add basic genre, since all genres at AEBN could be summarised
        // into this one
        md.addGenre(MediaGenres.EROTIC);

        // Certification
        // no data scrapeable---but obviously it's adult only, so simply
        // generate it
        String movieCertification = null;
        Certification certification = null;
        String country = options.getCountry().getAlpha2();
        LOGGER.debug("AEBN: generate certification for {}", country);
        // @formatter:off
        if (country.equals("DE")) {
            movieCertification = "FSK 18";
        }
        if (country.equals("US")) {
            movieCertification = "NC-17";
        }
        if (country.equals("GB")) {
            movieCertification = "R18";
        }
        if (country.equals("FR")) {
            movieCertification = "18";
        }
        if (country.equals("ES")) {
            movieCertification = "PX";
        }
        if (country.equals("JP")) {
            movieCertification = "R18+";
        }
        if (country.equals("IT")) {
            movieCertification = "V.M.18";
        }
        if (country.equals("NL")) {
            movieCertification = "16";
        }
        // @formatter:on
        certification = Certification.getCertification(options.getCountry(), movieCertification);
        if (certification != null) {
            LOGGER.debug("AEBN: certification({})", certification);
            md.addCertification(certification);
        }

        // Plot and Tagline
        LOGGER.debug("AEBN: parse plot");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=about]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            String moviePlot = cleanString(elements.first().text());
            md.storeMetadata(MediaMetadata.PLOT, moviePlot);
            // no separate tagline available, so extract the first sentence
            // from the movie plot
            String movieTagline = StrgUtils.substr(moviePlot, "^(.*?[.!?:])");
            LOGGER.debug("AEBN: tagline(" + movieTagline + ")");
            md.storeMetadata(MediaMetadata.TAGLINE, movieTagline);
        }

        // Actors
        LOGGER.debug("AEBN: parse actors");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=actor]");
        LOGGER.debug("AEBN: {} actors found", elements.size());
        for (Element anchor : elements) {
            String actorid = StrgUtils.substr(anchor.toString(), "starId=(\\d+)");
            String actorname = cleanString(anchor.select("[itemprop=name]").first().text());
            String actordetailsurl = BASE_DATAURL + anchor.attr("href");
            if (!actorname.isEmpty()) {
                LOGGER.debug("AEBN: add actor id({}), name({}), details({})", actorid, actorname,
                        actordetailsurl);
                MediaCastMember cm = new MediaCastMember();
                cm.setType(MediaCastMember.CastType.ACTOR);
                cm.setName(actorname);
                if (!actorid.isEmpty()) {
                    cm.setId(actorid);
                }

                // Actor detail page
                try {
                    Url starurl = new Url(actordetailsurl);
                    InputStream starurlstream = starurl.getInputStream();
                    Document stardocument = Jsoup.parse(starurlstream, "UTF-8", "");
                    starurlstream.close();
                    Elements elements2 = stardocument.getElementsByAttributeValue("class", "StarInfo");
                    if (elements2.size() == 0) {
                        LOGGER.debug("AEBN: no additional actor details found");
                    } else {
                        // Actor image
                        String actorimage = elements2.select("[itemprop=image]").first().attr("src");
                        LOGGER.debug("AEBN: actor image({})", actorimage);
                        if (!actorimage.isEmpty()) {
                            cm.setImageUrl(actorimage);
                        }
                        // Actor 'fanart' images
                        // unsure if this is ever shown in tmm
                        elements2 = stardocument.getElementsByAttributeValue("class", "StarDetailGallery")
                                .select("a");
                        LOGGER.debug("AEBN: {} gallery images found", elements2.size());
                        for (Element thumbnail : elements2) {
                            LOGGER.debug("AEBN: add fanart image({})", thumbnail.attr("href"));
                            cm.addFanart(thumbnail.attr("href"));
                        }
                    }
                } catch (Exception e) {
                    LOGGER.error("AEBN: Error downloading {}: {}", actordetailsurl, e);
                }

                md.addCastMember(cm);
            }
        }

        // Director
        LOGGER.debug("AEBN: parse director");
        elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=director]");
        if (elements.size() > 0) {
            LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size());
            String directorid = StrgUtils.substr(elements.toString(), "directorID=(\\d+)");
            String directorname = cleanString(elements.select("[itemprop=name]").first().text());
            if (!directorname.isEmpty()) {
                MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR);
                cm.setName(directorname);
                if (!directorid.isEmpty()) {
                    cm.setId(directorid);
                }
                cm.setImageUrl("");
                md.addCastMember(cm);
                LOGGER.debug("AEBN: add director id({}), name({})", directorid, directorname);
            }
        }

        // Original Title
        // if we have no original title, just copy the title
        if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE));
        }
    } catch (Exception e) {
        LOGGER.error("AEBN: Error parsing {}: {}", options.getResult().getUrl(), e);
    }

    return md;
}

From source file:org.tinymediamanager.scraper.hdtrailersnet.HDTrailersNet.java

@Override
public List<MediaTrailer> getTrailers(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getTrailers() " + options.toString());
    List<MediaTrailer> trailers = new ArrayList<MediaTrailer>();
    MediaMetadata md = options.getMetadata();

    if (md == null || StringUtils.isEmpty(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
        LOGGER.warn("no originalTitle served");
        return trailers;
    }//  w w w. j ava2s  . c  o  m

    String ot = md.getStringValue(MediaMetadata.ORIGINAL_TITLE);

    // check if the original title is not empty
    if (StringUtils.isEmpty(ot)) {
        return trailers;
    }

    // best guess
    String search = "http://www.hd-trailers.net/movie/"
            + ot.replaceAll("[^a-zA-Z0-9]", "-").replaceAll("--", "-").toLowerCase() + "/";
    try {
        LOGGER.debug("Guessed HD-Trailers Url: " + search);

        Url url = new CachedUrl(search);
        InputStream in = url.getInputStream();
        Document doc = Jsoup.parse(in, "UTF-8", "");
        Elements tr = doc.getElementsByAttributeValue("itemprop", "trailer");
        /*
         * <tr style="" itemprop="trailer" itemscope itemtype="http://schema.org/VideoObject"> <td class="bottomTableDate" rowspan="2">2012-03-30</td>
         * <td class="bottomTableName" rowspan="2"><span class="standardTrailerName" itemprop="name">Trailer 2</span> <a href=
         * "http://blog.hd-trailers.net/how-to-download-hd-trailers-from-apple/#workarounds" ><img src="http://static.hd-trailers.net/images/error.png"
         * width="16" height="16" style="border:0px;vertical-align:middle" alt="Apple Direct Download Unavailable"
         * title="Apple Direct Download Unavailable" /></a></td>
         * 
         * <td class="bottomTableResolution"><a href= "http://trailers.apple.com/movies/sony_pictures/meninblack3/meninblack3-tlr2_h480p.mov"
         * rel="lightbox[res480p 852 480]" title="Men in Black 3 - Trailer 2 - 480p">480p</a></td> <td class="bottomTableResolution"><a href=
         * "http://trailers.apple.com/movies/sony_pictures/meninblack3/meninblack3-tlr2_h720p.mov" rel="lightbox[res720p 1280 720]"
         * title="Men in Black 3 - Trailer 2 - 720p">720p</a></td> <td class="bottomTableResolution"><a href=
         * "http://trailers.apple.com/movies/sony_pictures/meninblack3/meninblack3-tlr2_h1080p.mov" rel="lightbox[res1080p 1920 1080]"
         * title="Men in Black 3 - Trailer 2 - 1080p">1080p</a></td> <td class="bottomTableIcon"> <a
         * href="http://trailers.apple.com/trailers/sony_pictures/meninblack3/" target="_blank"> <img
         * src="http://static.hd-trailers.net/images/apple.ico" alt="Apple" height="16px" width="16px"/></a></td> </tr> <tr> <td
         * class="bottomTableFileSize">36 MB</td> <td class="bottomTableFileSize">111 MB</td> <td class="bottomTableFileSize">181 MB</td> <td
         * class="bottomTableEmbed"><a href=
         * "/embed-code.php?movieId=men-in-black-3&amp;source=1&amp;trailerName=Trailer 2&amp;resolutions=480;720;1080" rel="lightbox[embed 600 600]"
         * title="Embed this video on your website">embed</a></td> </tr>
         */
        for (Element t : tr) {
            try {
                String date = t.select("td.bottomTableDate").first().text();
                String title = t.select("td.bottomTableName > span").first().text();

                // apple.com urls currently not working (according to hd-trailers)
                String tr0qual = t.select("td.bottomTableResolution > a").get(0).text();
                String tr0url = t.select("td.bottomTableResolution > a").get(0).attr("href");
                MediaTrailer trailer = new MediaTrailer();
                trailer.setName(title + " (" + date + ")");
                trailer.setDate(date);
                trailer.setUrl(tr0url);
                trailer.setQuality(tr0qual);
                trailer.setProvider(getProviderFromUrl(tr0url));
                LOGGER.debug(trailer.toString());
                trailers.add(trailer);

                String tr1qual = t.select("td.bottomTableResolution > a").get(1).text();
                String tr1url = t.select("td.bottomTableResolution > a").get(1).attr("href");
                trailer = new MediaTrailer();
                trailer.setName(title + " (" + date + ")");
                trailer.setDate(date);
                trailer.setUrl(tr1url);
                trailer.setQuality(tr1qual);
                trailer.setProvider(getProviderFromUrl(tr1url));
                LOGGER.debug(trailer.toString());
                trailers.add(trailer);

                String tr2qual = t.select("td.bottomTableResolution > a").get(2).text();
                String tr2url = t.select("td.bottomTableResolution > a").get(2).attr("href");
                trailer = new MediaTrailer();
                trailer.setName(title + " (" + date + ")");
                trailer.setDate(date);
                trailer.setUrl(tr2url);
                trailer.setQuality(tr2qual);
                trailer.setProvider(getProviderFromUrl(tr2url));
                LOGGER.debug(trailer.toString());
                trailers.add(trailer);
            } catch (IndexOutOfBoundsException i) {
                // ignore parse errors per line
                LOGGER.warn("Error parsing HD-Trailers line. Possible missing quality.");
            }
        }
    } catch (Exception e) {
        LOGGER.error("cannot parse HD-Trailers movie: " + ot, e);

        // clear cache
        CachedUrl.removeCachedFileForUrl(search);
    } finally {
    }
    return trailers;
}

From source file:org.trec.liveqa.GetYAnswersPropertiesFromQid.java

/**
 * /*from w  ww.  ja  v  a  2s  . c  om*/
 * @param iQid question ID
 * @return map of features and attributes: question title, body, category, best answer, date
 * @throws Exception
 */
public static Map<String, String> extractData(String iQid) throws Exception {

    Map<String, String> res = new LinkedHashMap<>();
    res.put("qid", iQid);

    // parse date from qid
    res.put("Date", DATE_FORMAT.parse(iQid.substring(0, 14)).toString());

    // get and mine html page
    String url = URL_PREFIX + iQid;
    HttpClient client = new HttpClient();
    GetMethod method = new GetMethod(url);
    method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
            new DefaultHttpMethodRetryHandler(3, false));
    try {
        int statusCode = client.executeMethod(method);
        if (statusCode != HttpStatus.SC_OK) {
            System.err.println("Method failed: " + method.getStatusLine());
        }
        InputStream responseBody = method.getResponseBodyAsStream();

        // strip top levels
        Document doc = Jsoup.parse(responseBody, "UTF8", url);
        Element html = doc.child(0);

        Element body = html.child(1);
        Element head = html.child(0);

        // get category
        res.put("Top level Category", findElementText(body, cc));

        // get title
        res.put("Title", findElementText(head, ct));

        // get body
        res.put("Body", findElementText(head, cb));

        // get keywords
        res.put("Keywords", findElementText(head, ck));

        // get best answer
        Element best_answer_div = html.select("div#ya-best-answer").first();
        if (best_answer_div != null) {
            res.put("Best Answer", findElementText(best_answer_div, cba));
        }

        responseBody.close();

    } catch (HttpException e) {
        System.err.println("Fatal protocol violation: " + e.getMessage());
        e.printStackTrace();
    } catch (IOException e) {
        System.err.println("Fatal transport error: " + e.getMessage());
        e.printStackTrace();
    } finally {
        method.releaseConnection();
    }

    return res;
}

From source file:org.xlrnet.metadict.engines.nobordbok.OrdbokEngine.java

@NotNull
private Optional<MonolingualEntry> processTableRow(@NotNull Element tableRow, @NotNull Language language) {
    MonolingualEntryBuilder entryBuilder = ImmutableMonolingualEntry.builder();
    DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder().setLanguage(language);

    // Extract general form
    Element oppslagsord = tableRow.getElementsByClass("oppslagsord").first();
    if (oppslagsord != null) {
        extractGeneralForm(objectBuilder, oppslagsord);
    } else {/*from   w w  w.ja  va  2 s  .  c  o m*/
        LOGGER.warn("Unable to find main element - skipping entry.");
        return Optional.empty();
    }

    // Extract wordclass and determine entrytype
    String wordClass = tableRow.getElementsByClass("oppsgramordklasse").first().text();
    entryBuilder.setEntryType(resolveEntryTypeWithWordClass(wordClass));

    // Get meanings
    Elements meaningCandidates = tableRow.select(".artikkelinnhold > .utvidet > .tyding");
    if (meaningCandidates.size() == 0)
        meaningCandidates = tableRow.select(".artikkelinnhold > .utvidet");
    meaningCandidates.forEach(e -> {
        String meaning = e.childNodes().stream()
                .filter(node -> (node instanceof TextNode) || (!((Element) node).hasClass("doemeliste")
                        && !node.hasAttr("style") && !((Element) node).hasClass("utvidet")
                        && !((Element) node).hasClass("artikkelinnhold")
                        && !((Element) node).hasClass("kompakt")))
                .map((Node n) -> {
                    if (n instanceof Element)
                        return ((Element) n).text();
                    else
                        return n.toString();
                }).collect(Collectors.joining());
        meaning = StringEscapeUtils.unescapeHtml4(meaning);
        meaning = StringUtils.strip(meaning);
        if (StringUtils.isNotBlank(meaning))
            objectBuilder.addMeaning(meaning);
    });

    entryBuilder.setContent(objectBuilder.build());

    return Optional.of(entryBuilder.build());
}

From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java

private void extractBilingualSynonyms(@NotNull String queryString, @NotNull Element synonymsTable,
        @NotNull BilingualQueryResultBuilder resultBuilder, @NotNull Language sourceLanguage) {
    List<Element> synonymNodes = synonymsTable.select("tr").stream()
            .filter(e -> e.getElementsByTag("th").size() == 0).collect(Collectors.toList());

    if (synonymNodes.size() == 0) {
        LOGGER.debug("No synonym entries found");
        return;/*from  www .ja v a  2  s .  c o  m*/
    }

    String synonymEntryTitle = synonymsTable.select("span.hl").first().text();

    Map<String, SynonymGroupBuilder> synonymGroupMap = new HashMap<>();

    for (Element synonymNode : synonymNodes) {
        // Extract only information from the "from"-node (i.e. source language)
        DictionaryObject newSynonym = processSingleNode(
                synonymNode.getElementsByClass(CLASS_TRANSLATION).get(0), sourceLanguage, queryString);
        String groupName = newSynonym.getDescription() != null ? newSynonym.getDescription() : queryString;
        if (groupName != null) {
            SynonymGroupBuilder groupBuilder = synonymGroupMap.computeIfAbsent(groupName,
                    (s) -> ImmutableSynonymGroup.builder()
                            .setBaseMeaning(ImmutableDictionaryObject.createSimpleObject(sourceLanguage, s)));
            groupBuilder.addSynonym(newSynonym);
        } else {
            LOGGER.warn("Synonym group is null");
        }
    }

    SynonymEntryBuilder synonymEntryBuilder = ImmutableSynonymEntry.builder()
            .setBaseObject(ImmutableDictionaryObject.createSimpleObject(sourceLanguage, synonymEntryTitle));

    for (SynonymGroupBuilder synonymGroupBuilder : synonymGroupMap.values()) {
        synonymEntryBuilder.addSynonymGroup(synonymGroupBuilder.build());
    }

    resultBuilder.addSynonymEntry(synonymEntryBuilder.build());
}

From source file:perflab.LoadrunnerWrapper.java

/**
 * @param htmlSummaryFile - load runner analysis html report file to parse
 * @param summaryFile     - location of summary file to be generated out of loadrunner html analysis
 *//*from w w  w .  j a  v a2s. co  m*/
protected void parseSummaryFile(String htmlSummaryFile, String summaryFile) {
    try {

        File input = new File(htmlSummaryFile);
        Document document = Jsoup.parse(input, "UTF-8");
        Document parse = Jsoup.parse(document.html());
        Elements table = parse.select("table").select("[summary=Transactions statistics summary table]");
        Elements rows = table.select("tr");

        getLog().info("number of rows in summary file=" + rows.size());

        for (Element row : rows) {

            //getLog().info("table element = " + row.toString());

            String name = row.select("td[headers=LraTransaction Name]").select("span").text();

            if (!name.isEmpty()) {

                float avgRT = Float.valueOf(row.select("td[headers=LraAverage]").select("span").text());
                float minRT = Float.valueOf(row.select("td[headers=LraMinimum]").select("span").text());
                float maxRT = Float.valueOf(row.select("td[headers=LraMaximum]").select("span").text());
                int passed = Integer.valueOf(row.select("td[headers=LraPass]").select("span").text()
                        .replace(".", "").replace(",", ""));
                int failed = Integer.valueOf(row.select("td[headers=LraFail]").select("span").text()
                        .replace(".", "").replace(",", ""));
                int failedPrecentage = failed / (failed + passed) * 100;

                getLog().info("Saving Transaction [" + name + "]");
                this.transactions.add(
                        new LoadRunnerTransaction(name, minRT, avgRT, maxRT, passed, failed, failedPrecentage));
            }
        }

    } catch (IOException e) {
        getLog().error("Can't read LoadRunner Analysis html report " + e.getMessage());
    }

}

From source file:perflab.loadrunnerwrapperjenkins.LoadRunnerWrapper.java

/**
 * @param htmlSummaryFile - load runner analysis html report file to parse
 * @param summaryFile     - location of summary file to be generated out of loadrunner
 *                        html analysis/*ww w . j av  a  2 s  . co  m*/
 */
protected void parseSummaryFile(String htmlSummaryFile, String summaryFile) {
    try {

        File input = new File(htmlSummaryFile);
        Document document = Jsoup.parse(input, "UTF-8");
        Document parse = Jsoup.parse(document.html());
        Elements table = parse.select("table").select("[summary=Transactions statistics summary table]");
        Elements rows = table.select("tr");

        logger.println("number of rows in summary file=" + rows.size());

        for (Element row : rows) {

            // logger.println("table element = " + row.toString());

            String name = row.select("td[headers=LraTransaction Name]").select("span").text();

            if (!name.isEmpty()) {

                float avgRT = Float.valueOf(row.select("td[headers=LraAverage]").select("span").text());
                float minRT = Float.valueOf(row.select("td[headers=LraMinimum]").select("span").text());
                float maxRT = Float.valueOf(row.select("td[headers=LraMaximum]").select("span").text());
                int passed = Integer.valueOf(row.select("td[headers=LraPass]").select("span").text()
                        .replace(".", "").replace(",", ""));
                int failed = Integer.valueOf(row.select("td[headers=LraFail]").select("span").text()
                        .replace(".", "").replace(",", ""));

                // logger.println("Saving Transaction [" + name + "]");
                this.transactions.add(new LoadRunnerTransaction(name, minRT, avgRT, maxRT, passed, failed));
            }
        }

    } catch (IOException e) {
        logger.println("Can't read LoadRunner Analysis html report " + e.getMessage());
    }

}

From source file:poe.trade.assist.UniquesListSearchGenerator.java

/**imgurl, reqLvl, base, mod
 * @param args//  w  w  w .  ja v  a  2s.  c  o m
 * @throws Exception 
 */
public static void main(String[] args) throws Exception {
    List<String> outputLines = new LinkedList<>();
    outputLines.add(
            "Name   Art   Req.Level   Base   Mods   TaslismanSC   TalismanHC   Standard   Hardcore   poewiki");
    for (String list : lists) {
        HttpResponse<String> response = Unirest.get("http://pathofexile.gamepedia.com/" + list)
                .header("User-Agent",
                        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0")
                .asString();
        Document doc = Jsoup.parse(response.getBody());
        Elements elems = doc.select("table.wikitable.sortable");
        for (Element table : elems) {
            Elements rows = table.select("tr");
            int ctr = 0;
            boolean hasRequiredLevel = false;
            for (Element row : rows) {
                if (ctr == 0) { // first row is headers
                    hasRequiredLevel = !row.select("abbr[title=\"Required Level\"]").isEmpty();
                    ctr++;
                    continue;
                }
                String name = row.child(0).child(0).attr("title");
                System.out.println("Now processing: " + name);
                String imgurl = "=IMAGE(\"" + row.select("img").attr("src") + "\", 3)";
                String base = row.child(1).child(0).attr("title");
                String reqLvl = hasRequiredLevel ? row.child(2).text() : "0";
                reqLvl = reqLvl.equalsIgnoreCase("n/a") ? "0" : reqLvl;
                String mod = "=\"";
                Elements mods = row.select("span.itemboxstatsgroup.text-mod");
                if (!mods.isEmpty()) {
                    if (mods.size() > 2)
                        throw new Exception("mods.size() is > 2. " + name + " - " + mods.toString());
                    boolean hasImplicit = mods.size() > 1;
                    String imp = hasImplicit ? mods.get(0).text() : "";
                    int expIdx = hasImplicit ? 1 : 0;
                    String lineSeparator = "\"&CHAR(10)&\"";
                    String exp = mods.get(expIdx).textNodes().stream().map(n -> n.text().trim())
                            .filter(s -> !s.isEmpty()).collect(Collectors.joining(lineSeparator));
                    String additionalExp = mods.get(expIdx).children().stream().filter(e -> e.hasText())
                            .map(e -> e.text().trim()).collect(Collectors.joining(lineSeparator));
                    if (additionalExp != null && !additionalExp.isEmpty())
                        exp += lineSeparator + additionalExp;
                    mod += imp;
                    if (hasImplicit)
                        mod += (lineSeparator + "--------------" + lineSeparator);
                    mod += exp;
                }
                mod += "\"";

                String standard = "Standard";
                String hardcore = "Hardcore";
                String tempsc = "Talisman";
                String temphc = "Talisman+Hardcore";
                String nameenc = URLEncoder.encode(name, "UTF-8");
                String sc = hyperlink(getSearchURL(standard, nameenc));
                String hc = hyperlink(getSearchURL(hardcore, nameenc));
                String tsc = hyperlink(getSearchURL(tempsc, nameenc));
                String thc = hyperlink(getSearchURL(temphc, nameenc));
                String poewikiurl = hyperlink("http://pathofexile.gamepedia.com/" + (name.replace(' ', '_')));

                String s = format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", name, imgurl, reqLvl, base, mod,
                        tsc, thc, sc, hc, poewikiurl);
                outputLines.add(s);
                Thread.sleep(1000);
            }
        }
    }
    FileUtils.writeLines(new File("uniqueslist.txt"), outputLines);
}

From source file:reportestrimestrales.Documento.java

protected void juntaDirectiva() {
    try {//  w  ww.ja va2s  . c o m
        Document doc = Jsoup.connect("http://www.ine.gob.gt/index.php/institucion/organizacion").get();
        Elements tables = doc.select("tbody");
        Element juntaDirectiva = tables.get(0);
        Elements miembros = juntaDirectiva.select("tr");
        String[] temp;
        for (int i = 0; i < miembros.size(); i++) {
            String[] partes = miembros.get(i).text().split("Suplente");
            if (i % 2 != 0) {
                junta.add(partes[0]);
                junta.add("Suplente" + partes[1]);
            } else {
                junta.add(partes[0]);
            }

        }

        Element tablaGerente = tables.get(1);
        Elements gerente = tablaGerente.select("tr");
        gerencias.add(gerente.get(0).text().split("Gerente")[1]);

        Element tablaGerencias = tables.get(2);
        Elements subgerencias = tablaGerencias.select("td");
        gerencias.add(subgerencias.get(0).text().split("[Ss]ubgerencia [Aa]dministrativa [Ff]inanciera")[1]);
        gerencias.add(subgerencias.get(1).text().split("[Ss]ubgerencia [Tt][e]cnica")[1]);
        System.out.println(gerencias.get(1));
        System.out.println(gerencias.get(2));
    } catch (IOException ex) {
        Logger.getLogger(Documento.class.getName()).log(Level.SEVERE, null, ex);
    }

    try {
        FileWriter escritora = new FileWriter(tex, true);
        BufferedWriter buffer = new BufferedWriter(escritora);
        buffer.write("\\hoja{\n" + "   $\\ $\n" + "   \\vspace{0.3cm}\n" + "   \n" + "   \\begin{center}\n"
                + "      {\\Bold \\LARGE AUTORIDADES}\\\\[0.7cm]\n" + "      \n" + "      \n"
                + "      {\\Bold \\large \\color{color1!89!black} JUNTA  DIRECTIVA} \\\\[0.5cm]\n" + "      \n"
                + "      \\begin{center}\n" + "         \\begin{tabular}{x{7.0cm}x{7.0cm}}\n");
        buffer.write(
                "\t \t \t { \\Bold Ministerio de Economa}   &       {\\Bold Ministerio de Finanzas}\\\\ \n");
        buffer.write("\t \t \t " + junta.get(junta.indexOf("Ministerio de Economa") + 1) + " & "
                + junta.get(junta.indexOf("Ministerio de Finanzas Pblicas") + 1) + "  \\\\ \n");
        buffer.write("\t \t \t " + junta.get(junta.indexOf("Ministerio de Economa") + 2) + " & "
                + junta.get(junta.indexOf("Ministerio de Finanzas Pblicas") + 2) + "  \\\\ \n");
        buffer.write("\t \t \t & \\\\\n"
                + "            {\\Bold Ministerio de Agricultura,} & {\\Bold Ministerio de Energa y Minas}\\\\ \n"
                + "            {\\Bold Ganadera y Alimentacin} & "
                + junta.get(junta.indexOf("Ministerio de Energa y Minas") + 1) + "\\\\ \n");
        buffer.write("\t \t \t "
                + junta.get(junta.indexOf("Ministerio de Agricultura, Ganadera y Alimentacin") + 1) + " & "
                + junta.get(junta.indexOf("Ministerio de Energa y Minas") + 2) + "  \\\\ \n");
        buffer.write("\t \t \t "
                + junta.get(junta.indexOf("Ministerio de Agricultura, Ganadera y Alimentacin") + 2)
                + " &   \\\\ \n");
        buffer.write("\t \t \t & {\\Bold Banco de Guatemala} \\\\ \n");
        buffer.write("\t \t \t {\\Bold Secretara de Planificacin y} &"
                + junta.get(junta.indexOf("Banco de Guatemala") + 1) + "\\\\\n"
                + "            {\t \t \t \\Bold Programacin de la Presidencia} & "
                + junta.get(junta.indexOf("Banco de Guatemala") + 2) + " \\\\\n \t \t \t "
                + junta.get(
                        junta.indexOf("Secretara de Planificacin y Programacin de la Presidencia") + 1)
                + " & \\\\ \n");
        buffer.write("\t \t \t "
                + junta.get(
                        junta.indexOf("Secretara de Planificacin y Programacin de la Presidencia") + 2)
                + " & {\\Bold Universidad de San Carlos de Guatemala} \\\\ \n");
        buffer.write("&" + junta.get(junta.indexOf("Universidad de San Carlos de Guatemala") + 1) + "  \\\\\n"
                + "            {\\Bold Comit Coordinador de } &"
                + junta.get(junta.indexOf("Universidad de San Carlos de Guatemala") + 2) + " \\\\  \n"
                + "            {\\Bold Asociaciones  Agrcolas, Comerciales, } & \\\\\n"
                + "            {\\Bold Industriales y Financieras} & {\\Bold Universidades Privadas} \\\\\n"
                + junta.get(junta.indexOf(
                        "Comit Coordinador de Asociaciones Agrcolas, Comerciales, Industriales y Financieras")
                        + 1)
                + " & " + junta.get(junta.indexOf("Universidades Privadas") + 1) + "\\\\\n"
                + junta.get(junta.indexOf(
                        "Comit Coordinador de Asociaciones Agrcolas, Comerciales, Industriales y Financieras")
                        + 2)
                + " & " + junta.get(junta.indexOf("Universidades Privadas") + 2) + "\\\\\n"
                + "         \\end{tabular}    \n" + "      \\end{center} \n   ");
        buffer.write("   \n" + "      {\\Bold \\large \\color{color1!89!black} GERENCIA}\\\\[0.2cm]\n"
                + "Gerente: " + gerencias.get(0) + "      \\\\\n" + "Subgerente Tcnico: " + gerencias.get(2)
                + "\\\\\n" + "Subgerente Administrativo Financiero: " + gerencias.get(1) + "\\\\ \n");
        buffer.write("\t \t \t \\end{center}\n");
        buffer.write("\t \t } \n");
        buffer.close();
    } catch (IOException ex) {
        Logger.getLogger(Documento.class.getName()).log(Level.SEVERE, null, ex);
    }
}