Example usage for org.jsoup.nodes Element hasClass

List of usage examples for org.jsoup.nodes Element hasClass

Introduction

In this page you can find the example usage for org.jsoup.nodes Element hasClass.

Prototype


public boolean hasClass(String className) 

Source Link

Document

Tests if this element has a class.

Usage

From source file:de.geeksfactory.opacclient.apis.Heidi.java

@Override
public DetailledItem getResultById(String id, final String homebranch) throws IOException {

    if (sessid == null) {
        start();/*from  w ww.  java2s  .  c o m*/
    }

    // Homebranch
    if (homebranch != null && !"".equals(homebranch)) {
        cookieStore.addCookie(new BasicClientCookie("zweig", homebranch));
    }

    String html = httpGet(opac_url + "/titel.cgi?katkey=" + id + "&sess=" + sessid, ENCODING, false,
            cookieStore);
    Document doc = Jsoup.parse(html);

    DetailledItem item = new DetailledItem();
    item.setId(id);

    Elements table = doc.select(".titelsatz tr");
    for (Element tr : table) {
        if (tr.select("th").size() == 0 || tr.select("td").size() == 0) {
            continue;
        }
        String d = tr.select("th").first().text();
        String c = tr.select("td").first().text();
        if (d.equals("Titel:")) {
            item.setTitle(c);
        } else if ((d.contains("URL") || d.contains("Link")) && tr.select("td a").size() > 0) {
            item.addDetail(new Detail(d, tr.select("td a").first().attr("href")));
        } else {
            item.addDetail(new Detail(d, c));
        }
    }

    if (doc.select(".ex table tr").size() > 0) {
        table = doc.select(".ex table tr");
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        for (Element tr : table) {
            if (tr.hasClass("exueber") || tr.select(".exsig").size() == 0 || tr.select(".exso").size() == 0
                    || tr.select(".exstatus").size() == 0) {
                continue;
            }
            Copy copy = new Copy();
            copy.setShelfmark(tr.select(".exsig").first().text());
            copy.setBranch(tr.select(".exso").first().text());
            String status = tr.select(".exstatus").first().text();
            if (status.contains("entliehen bis")) {
                copy.setReturnDate(fmt.parseLocalDate(status.replaceAll("entliehen bis ([0-9.]+) .*", "$1")));
                copy.setReservations(status.replaceAll(".*\\(.*Vormerkungen: ([0-9]+)\\)", "$1"));
                copy.setStatus("entliehen");
            } else {
                copy.setStatus(status);
            }
            item.addCopy(copy);
        }
    }

    for (Element a : doc.select(".status1 a")) {
        if (a.attr("href").contains("bestellung.cgi")) {
            item.setReservable(true);
            item.setReservation_info(id);
            break;
        }
    }
    for (Element a : doc.select(".titelsatz a")) {
        if (a.text().trim().matches("B.+nde")) {
            Map<String, String> volumesearch = new HashMap<>();
            volumesearch.put("query", getQueryParamsFirst(a.attr("href")).get("query"));
            item.setVolumesearch(volumesearch);
        }
    }

    return item;
}

From source file:com.weavers.duqhan.business.impl.ProductServiceImpl.java

@Override
public void loadTempProducts(List<StatusBean> statusBeans) {
    boolean isSuccess = true;
    String startDate = new Date().toString();
    Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE,
            "(==I==)DATE: " + startDate + "Store product details in temp product table start.....");
    try {/*w  w w . j a va  2s. co  m*/
        String status = "";
        for (StatusBean statusBean : statusBeans) {
            status = "Link duplicate";
            Temtproductlinklist temtproductlinklist = temtproductlinklistDao.loadById(statusBean.getId());
            if (temtproductlinklist != null && temtproductlinklist.getStatus() == 0) {
                Product testProduct = productDao.getProductByExternelLink(temtproductlinklist.getLink());
                if (testProduct == null) {
                    String value = "";
                    Elements detailMain;
                    Elements detailSub;
                    Elements specifics;
                    double votes = 0.0;
                    double stars = 0.0;
                    double feedback = 0.0;
                    String url = temtproductlinklist.getLink();
                    try {
                        testProduct = new Product();
                        Product savedTestProduct;

                        //=================== Random sleep START ===================//
                        //                            TimeUnit.SECONDS.sleep(30 + (int) (Math.random() * 100));
                        Random randomObj = new Random();
                        TimeUnit.SECONDS.sleep(randomObj.ints(30, 60).findFirst().getAsInt());
                        //=================== Random sleep END =====================//

                        Document doc = Jsoup.connect(url).get();
                        detailMain = doc.select("#j-detail-page");
                        if (!detailMain.isEmpty()) {

                            //=================== Criteria Block START==================//
                            detailMain = doc.select(".rantings-num");
                            if (!detailMain.isEmpty()) {
                                votes = Double.valueOf(detailMain.text().split(" votes")[0].split("\\(")[1]);
                            }
                            detailMain = doc.select(".percent-num");
                            if (!detailMain.isEmpty()) {
                                stars = Double.valueOf(detailMain.text());
                            }
                            detailMain = doc.select("ul.ui-tab-nav li[data-trigger='feedback'] a");
                            if (!detailMain.isEmpty()) {
                                feedback = Double.valueOf(detailMain.text().split("\\(")[1].split("\\)")[0]);
                            }
                            //=================== Criteria Block END==================//

                            if (votes > 10.0 && stars > 4.0 && feedback > 4.0) {
                                detailMain = doc.select(".detail-wrap .product-name");
                                testProduct.setName(detailMain
                                        .text());/*.substring(0, Math.min(detailMain.text().length(), 50))*/
                                detailMain = doc.select(".detail-wrap .product-name");
                                testProduct.setDescription(detailMain.text());
                                testProduct.setExternalLink(url);
                                testProduct.setVendorId(1l);//??????????????????????

                                //=================== Packaging block START==================//
                                Double weight = 1.0;
                                Double width = 1.0;
                                Double height = 1.0;
                                Double length = 1.0;
                                detailMain = doc.select(
                                        "div#j-product-desc div.pnl-packaging-main ul li.packaging-item");
                                for (Element element : detailMain) {
                                    String packagingTitle = element.select("span.packaging-title").text();
                                    String packagingDesc = element.select("span.packaging-des").text();
                                    if (packagingTitle.trim().equals("Package Weight:")) {
                                        String str = packagingDesc;
                                        str = str.replaceAll("[^.?0-9]+", " ");
                                        if (Arrays.asList(str.trim().split(" ")) != null) {
                                            if (!Arrays.asList(str.trim().split(" ")).isEmpty()) {
                                                try {
                                                    weight = Double.parseDouble(
                                                            Arrays.asList(str.trim().split(" ")).get(0));
                                                } catch (Exception e) {
                                                    weight = 1.0;
                                                }
                                            }
                                        }
                                        System.out.println("weight == " + weight);
                                    } else if (packagingTitle.trim().equals("Package Size:")) {
                                        String str = packagingDesc;
                                        str = str.replaceAll("[^.?0-9]+", " ");
                                        if (Arrays.asList(str.trim().split(" ")) != null) {
                                            if (!Arrays.asList(str.trim().split(" ")).isEmpty()) {
                                                try {
                                                    width = Double.parseDouble(
                                                            Arrays.asList(str.trim().split(" ")).get(0));
                                                    height = Double.parseDouble(
                                                            Arrays.asList(str.trim().split(" ")).get(1));
                                                    length = Double.parseDouble(
                                                            Arrays.asList(str.trim().split(" ")).get(2));
                                                } catch (Exception e) {
                                                    width = 1.0;
                                                    height = 1.0;
                                                    length = 1.0;
                                                }
                                            }
                                        }
                                        System.out.println("width == " + width);
                                        System.out.println("height == " + height);
                                        System.out.println("length == " + length);
                                    }
                                }
                                //=================== Packaging block END==================//

                                //=================== Category block START==================//
                                detailMain = doc.select("div.ui-breadcrumb div.container a");
                                Long productCategoryId = 0L;
                                String parentPath = "";
                                String thisCategory = detailMain.last().text().trim();
                                System.out.println("thisCategory == " + thisCategory);
                                Category parentCategory = new Category();
                                parentCategory.setId(0L);
                                parentCategory.setParentPath("");
                                for (Element element : detailMain) {
                                    String newCategory;
                                    newCategory = element.text().trim();
                                    System.out.println("newCategory======" + newCategory);
                                    if (newCategory.equals("Home") || newCategory.equals("All Categories")) {
                                    } else {
                                        Category category = categoryDao.getCategoryByName(newCategory);
                                        if (category != null) {
                                            if (category.getName().equals(thisCategory)) {
                                                productCategoryId = category.getId();
                                                parentPath = category.getParentPath();
                                            }
                                            parentCategory = category;
                                        } else {
                                            category = new Category();
                                            category.setId(null);
                                            category.setName(newCategory);
                                            category.setParentId(parentCategory.getId());
                                            category.setParentPath(parentCategory.getParentPath()
                                                    + parentCategory.getId() + "=");
                                            category.setQuantity(0);
                                            category.setImgUrl("-");
                                            category.setDisplayText(newCategory);
                                            Category category2 = categoryDao.save(category);
                                            if (category.getName().equals(thisCategory)) {
                                                productCategoryId = category2.getId();
                                                parentPath = category2.getParentPath();
                                            }
                                            parentCategory = category2;
                                        }
                                    }
                                }
                                //=================== Category block END==================//

                                //=============== Specifications block START==============//
                                detailMain = doc.select(".product-property-list .property-item");
                                String specifications = "";
                                for (Element element : detailMain) {
                                    specifications = specifications
                                            + element.select(".propery-title").get(0).text().replace(",", "/")
                                                    .replace(":", "-")
                                            + ":" + element.select(".propery-des").get(0).text()
                                                    .replace(",", "/").replace(":", "-")
                                            + ",";//TODO:, check
                                }
                                //=============== Specifications Block END==============//

                                //=============== Shipping Time Block START==============//
                                String shippingTime = "";
                                detailMain = doc.select(".shipping-days[data-role='delivery-days']");
                                System.out.println("value detailMain" + detailMain.toString());
                                shippingTime = detailMain.text();
                                //=============== Shipping Time Block END==============//

                                //=============== Shipping Cost Block START==============//
                                detailMain = doc.select(".logistics-cost");
                                value = detailMain.text();
                                if (!value.equalsIgnoreCase("Free Shipping")) {
                                    //                                        f = 0.00;
                                } else {
                                    //                                        f = Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1"));
                                }
                                //=============== Shipping Cost Block END==============//

                                //=================Product save 1st START==============//
                                testProduct.setCategoryId(productCategoryId);
                                testProduct.setLastUpdate(new Date());
                                testProduct.setParentPath(parentPath);
                                testProduct.setImgurl("-");
                                testProduct.setProperties("-");
                                testProduct.setProductWidth(width);
                                testProduct.setProductLength(length);
                                testProduct.setProductWeight(weight);
                                testProduct.setProductHeight(height);
                                testProduct.setShippingRate(0.0);
                                testProduct.setShippingTime("45");
                                testProduct.setSpecifications(specifications);
                                savedTestProduct = productDao.save(testProduct);
                                //====================Product save 1st END==============//

                                //========= Property, Property Value, Property Product Map Block START ========//
                                double discountPrice = 0.0;
                                double actualPrice = 0.0;
                                double markupPrice = 0.0;
                                String id = "";
                                String allProperties = "";
                                //------------------------Read Color css START---------------------//
                                specifics = doc.select("#j-product-info-sku dl.p-property-item");
                                Elements cssdetailMain = doc.select("link[href]");
                                Document cssdoc = new Document("");
                                System.out.println(
                                        "====================================================cssdetailMain"
                                                + cssdetailMain.size());
                                for (Element element : cssdetailMain) {
                                    String cssurl = element.attr("abs:href");
                                    if (cssurl.contains("??main-detail")) {
                                        try {
                                            cssdoc = Jsoup.connect(cssurl).get();
                                        } catch (IOException ex) {

                                        }
                                        break;
                                    }
                                }
                                //-----------------------Read Color css END--------------------------//

                                //-----------Product Property, Property Value START--------//
                                Map<String, ProductPropertyvalues> propertyValuesMap = new HashMap<>();
                                if (!specifics.isEmpty()) {
                                    ProductProperties testPorperties;
                                    ProductProperties saveTestPorperties;
                                    ProductPropertyvalues testPropertyValues;
                                    for (Element specific : specifics) {
                                        System.out.println("head  ==== " + specific.select("dt").text());
                                        testPorperties = productPropertiesDao
                                                .loadByName(specific.select("dt").text());
                                        if (testPorperties == null) {
                                            testPorperties = new ProductProperties();
                                            testPorperties.setPropertyName(specific.select("dt").text());
                                            saveTestPorperties = productPropertiesDao.save(testPorperties);
                                        } else {
                                            saveTestPorperties = testPorperties;
                                        }
                                        allProperties = allProperties + saveTestPorperties.getId().toString()
                                                + "-";
                                        detailSub = specific.select("dd ul li");
                                        String valu = "-";
                                        for (Element element : detailSub) {
                                            testPropertyValues = new ProductPropertyvalues();
                                            id = element.select("a[data-sku-id]").attr("data-sku-id").trim();
                                            testPropertyValues.setRefId(id);
                                            if (element.hasClass("item-sku-image")) {
                                                valu = element.select("a img[src]").get(0).absUrl("src")
                                                        .split(".jpg")[0] + ".jpg";
                                                String title = element.select("a img").get(0).attr("title");
                                                String imgUrl = GoogleBucketFileUploader
                                                        .uploadProductImage(valu, savedTestProduct.getId());
                                                valu = "<img src='" + imgUrl + "' title='" + title
                                                        + "' style='height:40px; width:40px;'/>";
                                            } else if (element.hasClass("item-sku-color")) {
                                                String style = cssdoc.html().split("sku-color-" + id)[1]
                                                        .split("}")[0].substring(1);
                                                valu = "<span style='" + style
                                                        + "' ; height:40px; width:40px; display:block;'></span>";
                                            } else {
                                                valu = element.select("a span").toString();
                                            }
                                            System.out.println("valu === " + valu);
                                            testPropertyValues.setProductId(savedTestProduct.getId());
                                            testPropertyValues.setPropertyId(saveTestPorperties.getId());
                                            testPropertyValues.setValueName(valu);
                                            propertyValuesMap.put(id,
                                                    productPropertyvaluesDao.save(testPropertyValues));
                                        }
                                    }
                                    savedTestProduct.setProperties(allProperties);
                                }
                                //-----------Product Property, Property Value END--------//

                                //----------------------Read json START------------------//
                                List<AxpProductDto> axpProductDtos = new ArrayList<>();
                                Elements scripts = doc.select("script"); // Get the script part
                                for (Element script : scripts) {
                                    if (script.html().contains("var skuProducts=")) {
                                        String jsonData = "";
                                        jsonData = script.html().split("var skuProducts=")[1]
                                                .split("var GaData")[0].trim();
                                        jsonData = jsonData.substring(0, jsonData.length() - 1);
                                        Gson gsonObj = new Gson();
                                        axpProductDtos = Arrays
                                                .asList(gsonObj.fromJson(jsonData, AxpProductDto[].class));
                                        break;
                                    }
                                }
                                //----------------------Read json END------------------//

                                //-------------Product Properties Map START------------//
                                for (AxpProductDto thisAxpProductDto : axpProductDtos) {
                                    SkuVal skuVal = thisAxpProductDto.getSkuVal();
                                    if (skuVal.getActSkuCalPrice() != null) {
                                        value = skuVal.getActSkuCalPrice().trim();
                                        discountPrice = CurrencyConverter.usdTOinr(
                                                Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1")));
                                        value = skuVal.getSkuCalPrice().trim();
                                        actualPrice = CurrencyConverter.usdTOinr(
                                                Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1")));
                                        markupPrice = discountPrice * 0.15 + 100;
                                        discountPrice = Math.ceil((discountPrice + markupPrice) / 10) * 10;
                                        actualPrice = Math.round(actualPrice + markupPrice);
                                    } else {
                                        discountPrice = 0.0;
                                        value = skuVal.getSkuCalPrice().trim();
                                        actualPrice = CurrencyConverter.usdTOinr(
                                                Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1")));
                                        markupPrice = actualPrice * 0.15 + 100;
                                        discountPrice = Math.round(actualPrice + markupPrice);
                                        actualPrice = Math.round(actualPrice + markupPrice);
                                    }

                                    ProductPropertiesMap productPropertyMap = new ProductPropertiesMap();
                                    String myPropValueIds = "";
                                    if (thisAxpProductDto.getSkuAttr() != null) {
                                        String[] skuPropIds = thisAxpProductDto.getSkuPropIds().split(",");
                                        for (String skuPropId : skuPropIds) {
                                            myPropValueIds = myPropValueIds
                                                    + propertyValuesMap.get(skuPropId).getId().toString() + "_";
                                        }

                                        productPropertyMap.setPropertyvalueComposition(myPropValueIds);
                                    } else {
                                        productPropertyMap.setPropertyvalueComposition("_");
                                    }
                                    productPropertyMap.setDiscount(discountPrice);
                                    productPropertyMap.setPrice(actualPrice);
                                    productPropertyMap.setProductId(savedTestProduct);
                                    productPropertyMap.setQuantity(5l);
                                    productPropertiesMapDao.save(productPropertyMap);
                                }
                                //-------------Product Properties Map START------------//
                                //========= Property, Property Value, Property Product Map Block END ========//

                                //============= Multiple Image Block START =============//
                                detailMain = doc.select("ul.image-thumb-list span.img-thumb-item img[src]");
                                int flg = 0;
                                String imgUrl = "";
                                for (Element element : detailMain) {
                                    imgUrl = GoogleBucketFileUploader.uploadProductImage(
                                            element.absUrl("src").split(".jpg")[0] + ".jpg",
                                            savedTestProduct.getId());
                                    if (flg == 0) {
                                        flg++;
                                        savedTestProduct.setImgurl(imgUrl);
                                    } else {
                                        ProductImg productImg = new ProductImg();
                                        productImg.setId(null);
                                        productImg.setImgUrl(imgUrl);
                                        productImg.setProductId(savedTestProduct.getId());
                                        productImgDao.save(productImg);
                                    }
                                }
                                //============= Multiple Image Block END =============//

                                //=================Product save final START==============//
                                if (productDao.save(savedTestProduct) != null) {
                                    temtproductlinklist.setStatus(1);//
                                    temtproductlinklistDao.save(temtproductlinklist);
                                    status = "Success";
                                }
                                //=================Product save final START==============//
                            } else {
                                temtproductlinklist.setStatus(2);//
                                temtproductlinklistDao.save(temtproductlinklist);
                                status = "criteria mismatch";
                            }
                        } else {
                            status = "Page not found";
                        }
                    } catch (Exception ex) {
                        System.out.println(
                                "=============================================================Exception1" + ex);
                        temtproductlinklist.setStatus(4);//
                        temtproductlinklistDao.save(temtproductlinklist);
                        System.out.println("Exception === " + ex);
                        status = "Failure";
                        Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, "(==E==)DATE: "
                                + new Date().toString()
                                + "Store product details in temp product table get error in sub process.....\n Link Id: "
                                + statusBean.getId() + "\n Started on" + startDate, ex);
                    }
                } else {
                    temtproductlinklist.setStatus(3);//
                    temtproductlinklistDao.save(temtproductlinklist);
                    status = "Product exsist";
                }
            }
            //                String body = "Id: " + temtproductlinklist.getId() + "<br/> Status: " + status;
            //                MailSender.sendEmail("krisanu.nandi@pkweb.in", "Product captured", body, "subhendu.sett@pkweb.in");
            statusBean.setStatus(status);
        }
        System.out.println("=============================================================status" + status);
    } catch (Exception e) {
        System.out.println("=============================================================Exception2" + e);
        isSuccess = false;
        String body = "(==E==)DATE: " + new Date().toString()
                + "Store product details in temp product table get error.....<br/> Started on" + startDate
                + "<br/>";
        Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, body, e);
        //            MailSender.sendEmail("krisanu.nandi@pkweb.in", "Stopped store product details", body + e.getLocalizedMessage(), "subhendu.sett@pkweb.in");
    }
    if (isSuccess) {
        String body = "(==I==)DATE: " + new Date().toString()
                + "Store product details in temp product table end.....<br/> Started on" + startDate;
        Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, body);
        /*ObjectMapper mapper = new ObjectMapper();
        try {
        MailSender.sendEmail("krisanu.nandi@pkweb.in", "Completed store product details", body + "=============<br/><br/>" + mapper.writeValueAsString(statusBeans), "subhendu.sett@pkweb.in");
        } catch (JsonProcessingException ex) {
        Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, null, ex);
        }*/
    }
    //        return statusBeans;
    System.out.println("=============================================================end");
}

From source file:org.sakaiproject.nakamura.files.migrator.PageMigrator.java

void extractWidget(JSONObject originalStructure, String contentId, Set<String> widgetsUsed, String ref,
        JSONObject currentPage, JSONObject currentRow, int leftSideColumn, Element widgetElement)
        throws JSONException {
    String[] widgetIdParts = widgetElement.attr("id").split("_");
    String widgetType = widgetIdParts[1];
    String widgetId = widgetIdParts.length > 2 ? widgetIdParts[2] : generateWidgetId();
    int columnIndex;
    if (widgetElement.hasClass("block_image_left")) {
        columnIndex = 0;//from  w  w w .j  a v a 2  s  .c o  m
    } else if (widgetElement.hasClass("block_image_right")) {
        columnIndex = leftSideColumn > 0 ? 2 : 1;
    } else {
        columnIndex = leftSideColumn > 0 ? 1 : 0;
    }
    generateNewCell(widgetId, widgetType, currentPage, currentRow, columnIndex,
            getJSONObjectOrNull(originalStructure, widgetId));
    widgetsUsed.add(widgetId);
    if ("discussion".equals(widgetType)) {
        migrateDiscussionWidget(contentId, ref, currentPage, widgetId);
    }
    widgetElement.remove();
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java

@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getMetadata() " + options.toString());
    // check if there is a md in the result
    if (options.getResult() != null && options.getResult().getMetadata() != null) {
        LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult());
        return options.getResult().getMetadata();
    }//from   w  ww.  j av a  2s .co m

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    String imdbId = "";

    // imdbId from searchResult
    if (options.getResult() != null) {
        imdbId = options.getResult().getIMDBId();
    }

    // imdbid from scraper option
    if (!MetadataUtil.isValidImdbId(imdbId)) {
        imdbId = options.getImdbId();
    }

    if (!MetadataUtil.isValidImdbId(imdbId)) {
        return md;
    }

    LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId);
    md.setId(MediaMetadata.IMDBID, imdbId);

    ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<Document>(executor);
    ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<MediaMetadata>(
            executor);

    // worker for imdb request (/combined) (everytime from akas.imdb.com)
    // StringBuilder sb = new StringBuilder(imdbSite.getSite());
    StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/combined");
    Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().name(),
            options.getCountry().getAlpha2());
    Future<Document> futureCombined = compSvcImdb.submit(worker);

    // worker for imdb request (/plotsummary) (from chosen site)
    Future<Document> futurePlotsummary = null;
    sb = new StringBuilder(imdbSite.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/plotsummary");

    worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2());
    futurePlotsummary = compSvcImdb.submit(worker);

    // worker for tmdb request
    Future<MediaMetadata> futureTmdb = null;
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry());
        futureTmdb = compSvcTmdb.submit(worker2);
    }

    Document doc;
    doc = futureCombined.get();

    /*
     * title and year have the following structure
     * 
     * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span
     * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div>
     */

    // parse title and year
    Element title = doc.getElementById("tn15title");
    if (title != null) {
        Element element = null;
        // title
        Elements elements = title.getElementsByTag("h1");
        if (elements.size() > 0) {
            element = elements.first();
            String movieTitle = cleanString(element.ownText());
            md.storeMetadata(MediaMetadata.TITLE, movieTitle);
        }

        // year
        elements = title.getElementsByTag("span");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();

            // search year
            Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)");
            Matcher matcher = yearPattern.matcher(content);
            while (matcher.find()) {
                if (matcher.group(1) != null) {
                    String movieYear = matcher.group(1);
                    md.storeMetadata(MediaMetadata.YEAR, movieYear);
                    break;
                }
            }
        }

        // original title
        elements = title.getElementsByAttributeValue("class", "title-extra");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();
            content = content.replaceAll("\\(original title\\)", "").trim();
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, content);
        }
    }

    // poster
    Element poster = doc.getElementById("primary-poster");
    if (poster != null) {
        String posterUrl = poster.attr("src");
        posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
        posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
        processMediaArt(md, MediaArtworkType.POSTER, "Poster", posterUrl);
    }

    /*
     * <div class="starbar-meta"> <b>7.4/10</b> &nbsp;&nbsp;<a href="ratings" class="tn15more">52,871 votes</a>&nbsp;&raquo; </div>
     */

    // rating and rating count
    Element ratingElement = doc.getElementById("tn15rating");
    if (ratingElement != null) {
        Elements elements = ratingElement.getElementsByClass("starbar-meta");
        if (elements.size() > 0) {
            Element div = elements.get(0);

            // rating comes in <b> tag
            Elements b = div.getElementsByTag("b");
            if (b.size() == 1) {
                String ratingAsString = b.text();
                Pattern ratingPattern = Pattern.compile("([0-9]\\.[0-9])/10");
                Matcher matcher = ratingPattern.matcher(ratingAsString);
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        float rating = 0;
                        try {
                            rating = Float.valueOf(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.RATING, rating);
                        break;
                    }
                }
            }

            // count
            Elements a = div.getElementsByAttributeValue("href", "ratings");
            if (a.size() == 1) {
                String countAsString = a.text().replaceAll("[.,]|votes", "").trim();
                int voteCount = 0;
                try {
                    voteCount = Integer.parseInt(countAsString);
                } catch (Exception e) {
                }
                md.storeMetadata(MediaMetadata.VOTE_COUNT, voteCount);
            }
        }

        // top250
        elements = ratingElement.getElementsByClass("starbar-special");
        if (elements.size() > 0) {
            Elements a = elements.get(0).getElementsByTag("a");
            if (a.size() > 0) {
                Element anchor = a.get(0);
                Pattern topPattern = Pattern.compile("Top 250: #([0-9]{1,3})");
                Matcher matcher = topPattern.matcher(anchor.ownText());
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        int top250 = 0;
                        try {
                            top250 = Integer.parseInt(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.TOP_250, top250);
                    }
                }
            }
        }
    }

    // parse all items coming by <div class="info">
    Elements elements = doc.getElementsByClass("info");
    for (Element element : elements) {
        // only parse divs
        if (!"div".equals(element.tag().getName())) {
            continue;
        }

        // elements with h5 are the titles of the values
        Elements h5 = element.getElementsByTag("h5");
        if (h5.size() > 0) {
            Element firstH5 = h5.first();
            String h5Title = firstH5.text();

            // release date
            /*
             * <div class="info"><h5>Release Date:</h5><div class="info-content">5 January 1996 (USA)<a class="tn15more inline"
             * href="/title/tt0114746/releaseinfo"
             * onclick="(new Image()).src='/rg/title-tease/releasedates/images/b.gif?link=/title/tt0114746/releaseinfo';"> See more</a>&nbsp;</div></div>
             */
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getReleaseDate() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element releaseDateElement = div.first();
                    String releaseDate = cleanString(releaseDateElement.ownText().replaceAll("", ""));
                    Pattern pattern = Pattern.compile("(.*)\\(.*\\)");
                    Matcher matcher = pattern.matcher(releaseDate);
                    if (matcher.find()) {
                        try {
                            SimpleDateFormat sdf = new SimpleDateFormat("d MMM yyyy");
                            Date parsedDate = sdf.parse(matcher.group(1));
                            sdf = new SimpleDateFormat("dd-MM-yyyy");
                            md.storeMetadata(MediaMetadata.RELEASE_DATE, sdf.format(parsedDate));
                        } catch (Exception e) {
                        }
                    }
                }
            }

            /*
             * <div class="info"><h5>Tagline:</h5><div class="info-content"> (7) To Defend Us... <a class="tn15more inline"
             * href="/title/tt0472033/taglines" onClick= "(new Image()).src='/rg/title-tease/taglines/images/b.gif?link=/title/tt0472033/taglines';" >See
             * more</a>&nbsp;&raquo; </div></div>
             */
            // tagline
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getTagline() + ".*")
                    && !options.isScrapeImdbForeignLanguage()) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String tagline = cleanString(taglineElement.ownText().replaceAll("", ""));
                    md.storeMetadata(MediaMetadata.TAGLINE, tagline);
                }
            }

            /*
             * <div class="info-content"><a href="/Sections/Genres/Animation/">Animation</a> | <a href="/Sections/Genres/Action/">Action</a> | <a
             * href="/Sections/Genres/Adventure/">Adventure</a> | <a href="/Sections/Genres/Fantasy/">Fantasy</a> | <a
             * href="/Sections/Genres/Mystery/">Mystery</a> | <a href="/Sections/Genres/Sci-Fi/">Sci-Fi</a> | <a
             * href="/Sections/Genres/Thriller/">Thriller</a> <a class="tn15more inline" href="/title/tt0472033/keywords" onClick=
             * "(new Image()).src='/rg/title-tease/keywords/images/b.gif?link=/title/tt0472033/keywords';" > See more</a>&nbsp;&raquo; </div>
             */
            // genres are only scraped from akas.imdb.com
            if (h5Title.matches("(?i)" + imdbSite.getGenre() + "(.*)")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Elements a = div.first().getElementsByTag("a");
                    for (Element anchor : a) {
                        if (anchor.attr("href").matches("/Sections/Genres/.*")) {
                            md.addGenre(getTmmGenre(anchor.ownText()));
                        }
                    }
                }
            }
            // }

            /*
             * <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) | 178 min (extended cut)</div></div>
             */
            // runtime
            // if (h5Title.matches("(?i)" + imdbSite.getRuntime() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getRuntime() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String first = taglineElement.ownText().split("\\|")[0];
                    String runtimeAsString = cleanString(first.replaceAll("min", ""));
                    int runtime = 0;
                    try {
                        runtime = Integer.parseInt(runtimeAsString);
                    } catch (Exception e) {
                        // try to filter out the first number we find
                        Pattern runtimePattern = Pattern.compile("([0-9]{2,3})");
                        Matcher matcher = runtimePattern.matcher(runtimeAsString);
                        if (matcher.find()) {
                            runtime = Integer.parseInt(matcher.group(0));
                        }
                    }
                    md.storeMetadata(MediaMetadata.RUNTIME, runtime);
                }
            }

            /*
             * <div class="info"><h5>Country:</h5><div class="info-content"><a href="/country/fr">France</a> | <a href="/country/es">Spain</a> | <a
             * href="/country/it">Italy</a> | <a href="/country/hu">Hungary</a></div></div>
             */
            // country
            if (h5Title.matches("(?i)Country.*")) {
                Elements a = element.getElementsByTag("a");
                String countries = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/country/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String country = matcher.group(1);
                        if (StringUtils.isNotEmpty(countries)) {
                            countries += ", ";
                        }
                        countries += country.toUpperCase();
                    }
                }
                md.storeMetadata(MediaMetadata.COUNTRY, countries);
            }

            /*
             * <div class="info"><h5>Language:</h5><div class="info-content"><a href="/language/en">English</a> | <a href="/language/de">German</a> | <a
             * href="/language/fr">French</a> | <a href="/language/it">Italian</a></div>
             */
            // Spoken languages
            if (h5Title.matches("(?i)Language.*")) {
                Elements a = element.getElementsByTag("a");
                String spokenLanguages = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/language/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String langu = matcher.group(1);
                        if (StringUtils.isNotEmpty(spokenLanguages)) {
                            spokenLanguages += ", ";
                        }
                        spokenLanguages += langu;
                    }
                }
                md.storeMetadata(MediaMetadata.SPOKEN_LANGUAGES, spokenLanguages);
            }

            /*
             * <div class="info"><h5>Certification:</h5><div class="info-content"><a href="/search/title?certificates=us:pg">USA:PG</a> <i>(certificate
             * #47489)</i> | <a href="/search/title?certificates=ca:pg">Canada:PG</a> <i>(Ontario)</i> | <a
             * href="/search/title?certificates=au:pg">Australia:PG</a> | <a href="/search/title?certificates=in:u">India:U</a> | <a
             * href="/search/title?certificates=ie:pg">Ireland:PG</a> ...</div></div>
             */
            // certification
            // if (h5Title.matches("(?i)" + imdbSite.getCertification() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getCertification() + ".*")) {
                Elements a = element.getElementsByTag("a");
                for (Element anchor : a) {
                    // certification for the right country
                    if (anchor.attr("href").matches(
                            "(?i)/search/title\\?certificates=" + options.getCountry().getAlpha2() + ".*")) {
                        Pattern certificationPattern = Pattern.compile(".*:(.*)");
                        Matcher matcher = certificationPattern.matcher(anchor.ownText());
                        Certification certification = null;
                        while (matcher.find()) {
                            if (matcher.group(1) != null) {
                                certification = Certification.getCertification(options.getCountry(),
                                        matcher.group(1));
                            }
                        }

                        if (certification != null) {
                            md.addCertification(certification);
                            break;
                        }
                    }
                }
            }
        }

        /*
         * <div id="director-info" class="info"> <h5>Director:</h5> <div class="info-content"><a href="/name/nm0000416/" onclick=
         * "(new Image()).src='/rg/directorlist/position-1/images/b.gif?link=name/nm0000416/';" >Terry Gilliam</a><br/> </div> </div>
         */
        // director
        if ("director-info".equals(element.id())) {
            Elements a = element.getElementsByTag("a");
            for (Element anchor : a) {
                if (anchor.attr("href").matches("/name/nm.*")) {
                    MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR);
                    cm.setName(anchor.ownText());
                    md.addCastMember(cm);
                }
            }
        }
    }

    /*
     * <table class="cast"> <tr class="odd"><td class="hs"><a href="http://pro.imdb.com/widget/resume_redirect/" onClick=
     * "(new Image()).src='/rg/resume/prosystem/images/b.gif?link=http://pro.imdb.com/widget/resume_redirect/';" ><img src=
     * "http://i.media-imdb.com/images/SF9113d6f5b7cb1533c35313ccd181a6b1/tn15/no_photo.png" width="25" height="31" border="0"></td><td class="nm"><a
     * href="/name/nm0577828/" onclick= "(new Image()).src='/rg/castlist/position-1/images/b.gif?link=/name/nm0577828/';" >Joseph Melito</a></td><td
     * class="ddd"> ... </td><td class="char"><a href="/character/ch0003139/">Young Cole</a></td></tr> <tr class="even"><td class="hs"><a
     * href="/name/nm0000246/" onClick= "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0000246/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BMjA0MjMzMTE5OF5BMl5BanBnXkFtZTcwMzQ2ODE3Mw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0000246/" onclick=
     * "(new Image()).src='/rg/castlist/position-2/images/b.gif?link=/name/nm0000246/';" >Bruce Willis</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003139/">James Cole</a></td></tr> <tr class="odd"><td class="hs"><a href="/name/nm0781218/" onClick=
     * "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0781218/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BODI1MTA2MjkxM15BMl5BanBnXkFtZTcwMTcwMDg2Nw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0781218/" onclick=
     * "(new Image()).src='/rg/castlist/position-3/images/b.gif?link=/name/nm0781218/';" >Jon Seda</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003143/">Jose</a></td></tr>...</table>
     */
    // cast
    elements = doc.getElementsByClass("cast");
    if (elements.size() > 0) {
        Elements tr = elements.get(0).getElementsByTag("tr");
        for (Element row : tr) {
            Elements td = row.getElementsByTag("td");
            MediaCastMember cm = new MediaCastMember();
            for (Element column : td) {
                // actor thumb
                if (column.hasClass("hs")) {
                    Elements img = column.getElementsByTag("img");
                    if (img.size() > 0) {
                        String thumbUrl = img.get(0).attr("src");
                        if (thumbUrl.contains("no_photo.png")) {
                            cm.setImageUrl("");
                        } else {
                            thumbUrl = thumbUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                            thumbUrl = thumbUrl.replaceAll("SY[0-9]{2,4}_", "");
                            cm.setImageUrl(thumbUrl);
                        }
                    }
                }
                // actor name
                if (column.hasClass("nm")) {
                    cm.setName(cleanString(column.text()));
                }
                // character
                if (column.hasClass("char")) {
                    cm.setCharacter(cleanString(column.text()));
                }
            }
            if (StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) {
                cm.setType(CastType.ACTOR);
                md.addCastMember(cm);
            }
        }
    }

    Element content = doc.getElementById("tn15content");
    if (content != null) {
        elements = content.getElementsByTag("table");
        for (Element table : elements) {
            // writers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getWriter())) {
                Elements anchors = table.getElementsByTag("a");
                for (Element anchor : anchors) {
                    if (anchor.attr("href").matches("/name/nm.*")) {
                        MediaCastMember cm = new MediaCastMember(CastType.WRITER);
                        cm.setName(anchor.ownText());
                        md.addCastMember(cm);
                    }
                }
            }

            // producers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                Elements rows = table.getElementsByTag("tr");
                for (Element row : rows) {
                    if (row.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                        continue;
                    }
                    Elements columns = row.children();
                    if (columns.size() == 0) {
                        continue;
                    }
                    MediaCastMember cm = new MediaCastMember(CastType.PRODUCER);
                    String name = cleanString(columns.get(0).text());
                    if (StringUtils.isBlank(name)) {
                        continue;
                    }
                    cm.setName(name);
                    if (columns.size() >= 3) {
                        cm.setPart(cleanString(columns.get(2).text()));
                    }
                    md.addCastMember(cm);
                }
            }
        }
    }

    // Production companies
    elements = doc.getElementsByClass("blackcatheader");
    for (Element blackcatheader : elements) {
        if (blackcatheader.ownText().equals(ImdbSiteDefinition.IMDB_COM.getProductionCompanies())) {
            Elements a = blackcatheader.nextElementSibling().getElementsByTag("a");
            StringBuilder productionCompanies = new StringBuilder();
            for (Element anchor : a) {
                if (StringUtils.isNotEmpty(productionCompanies)) {
                    productionCompanies.append(", ");
                }
                productionCompanies.append(anchor.ownText());
            }
            md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, productionCompanies.toString());
            break;
        }
    }

    /*
     * plot from /plotsummary
     */
    // build the url
    doc = null;
    doc = futurePlotsummary.get();

    // imdb.com has another site structure
    if (imdbSite == ImdbSiteDefinition.IMDB_COM) {
        Elements zebraList = doc.getElementsByClass("zebraList");
        if (zebraList != null && !zebraList.isEmpty()) {
            Elements odd = zebraList.get(0).getElementsByClass("odd");
            if (odd.isEmpty()) {
                odd = zebraList.get(0).getElementsByClass("even"); // sometimes imdb has even
            }
            if (odd.size() > 0) {
                Elements p = odd.get(0).getElementsByTag("p");
                if (p.size() > 0) {
                    String plot = cleanString(p.get(0).ownText());
                    md.storeMetadata(MediaMetadata.PLOT, plot);
                }
            }
        }
    } else {
        Element wiki = doc.getElementById("swiki.2.1");
        if (wiki != null) {
            String plot = cleanString(wiki.ownText());
            md.storeMetadata(MediaMetadata.PLOT, plot);
        }
    }

    // title also from chosen site if we are not scraping akas.imdb.com
    if (imdbSite != ImdbSiteDefinition.IMDB_COM) {
        title = doc.getElementById("tn15title");
        if (title != null) {
            Element element = null;
            // title
            elements = title.getElementsByClass("main");
            if (elements.size() > 0) {
                element = elements.first();
                String movieTitle = cleanString(element.ownText());
                md.storeMetadata(MediaMetadata.TITLE, movieTitle);
            }
        }
    }
    // }

    // get data from tmdb?
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        MediaMetadata tmdbMd = futureTmdb.get();
        if (options.isScrapeImdbForeignLanguage() && tmdbMd != null
                && StringUtils.isNotBlank(tmdbMd.getStringValue(MediaMetadata.PLOT))) {
            // tmdbid
            md.setId(MediaMetadata.TMDBID, tmdbMd.getId(MediaMetadata.TMDBID));
            // title
            md.storeMetadata(MediaMetadata.TITLE, tmdbMd.getStringValue(MediaMetadata.TITLE));
            // original title
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, tmdbMd.getStringValue(MediaMetadata.ORIGINAL_TITLE));
            // tagline
            md.storeMetadata(MediaMetadata.TAGLINE, tmdbMd.getStringValue(MediaMetadata.TAGLINE));
            // plot
            md.storeMetadata(MediaMetadata.PLOT, tmdbMd.getStringValue(MediaMetadata.PLOT));
            // collection info
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
        }
        if (options.isScrapeCollectionInfo() && tmdbMd != null) {
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
        }
    }

    // if we have still no original title, take the title
    if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
        md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE));
    }

    return md;
}

From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java

private void findRecommendations(@NotNull Document doc, @NotNull BilingualQueryResultBuilder resultBuilder) {
    // Determine all candidate nodes:
    Elements alternativeNodes = doc.select("div.cc > p > *");

    Language currentLanguage = null;//from   w w  w.j  ava 2s  . c  om

    for (Element node : alternativeNodes) {
        // If the next node is a flagicon, try to determine the language for the next entries from the class name
        if (node.tagName().equals("span") && node.hasClass("flagicon")) {
            Set<String> classNames = node.classNames();
            classNames.remove("flagicon");
            for (String className : classNames) {
                Language candidate = Language.getExistingLanguageById(className);
                if (candidate != null) {
                    currentLanguage = candidate;
                    break;
                }
            }
        } else if (node.tagName().equals("a")) {
            String recommendationText = node.text();

            DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder();
            objectBuilder.setLanguage(currentLanguage).setGeneralForm(recommendationText);

            resultBuilder.addSimilarRecommendation(objectBuilder.build());
        }
    }
}