Example usage for org.jsoup.nodes Element tagName

List of usage examples for org.jsoup.nodes Element tagName

Introduction

In this page you can find the example usage for org.jsoup.nodes Element tagName.

Prototype

public String tagName() 

Source Link

Document

Get the name of the tag for this element.

Usage

From source file:org.opens.tanaguru.rules.rgaa30.Rgaa30Rule110102.java

/**
 * This method linked each label which have an input child on a page to its
 * form in a map.// w  w  w.j a  v  a 2s  .c  om
 */
private void putLabelElementHandlerIntoTheMap() {
    for (Element el : labelElementHandler.get()) {
        Element tmpElement = el.parent();
        while (StringUtils.isNotBlank(tmpElement.tagName())) {
            if (tmpElement.tagName().equals(FORM_TAG)) {
                if (labelFormMap.containsKey(tmpElement)) {
                    Elements els = el.select(FORM_ELEMENT_WITH_ID_CSS_LIKE_QUERY);
                    if (!els.isEmpty()) {
                        labelFormMap.get(tmpElement).add(el);
                    }
                } else {
                    Elements els = el.select(FORM_ELEMENT_WITH_ID_CSS_LIKE_QUERY);
                    if (!els.isEmpty()) {
                        ElementHandler<Element> labelElement = new ElementHandlerImpl();
                        labelElement.add(el);
                        labelFormMap.put(tmpElement, labelElement);
                    }
                }
                break;
            }
            tmpElement = tmpElement.parent();
        }
    }
}

From source file:org.opens.tanaguru.rules.rgaa30.Rgaa30Rule110103.java

/**
 * This method linked each input on a page to its form in a map.
 *///from  w  ww  .j a v  a  2s .c o m
private void putInputElementHandlerIntoTheMap() {
    for (Element el : inputElementHandler.get()) {
        Element tmpElement = el.parent();
        while (StringUtils.isNotBlank(tmpElement.tagName())) {
            if (tmpElement.tagName().equals(FORM_TAG)) {
                if (inputFormMap.containsKey(tmpElement)) {
                    inputFormMap.get(tmpElement).add(el);
                } else {
                    ElementHandler<Element> inputElement = new ElementHandlerImpl();
                    inputElement.add(el);
                    inputFormMap.put(tmpElement, inputElement);
                }
                break;
            }
            tmpElement = tmpElement.parent();
        }
    }
}

From source file:org.sbs.goodcrawler.extractor.selector.action.string.ActionFactory.java

public static SelectorAction create(Element element, String c) {
    if ("string".equals(c)) {
        StringActionType $type = EnumUtils.getEnum(StringActionType.class, element.attr("operation"));
        if (null == $type) {
            try {
                throw new Exception(
                        "?" + element.tagName() + "operation");
            } catch (Exception e) {
                e.printStackTrace();// w w w . j av  a2  s  .c  om
            }
        }
        switch ($type) {
        case after:
            return new StringAfterAction(element.attr("split"));
        case afterLast:
            return new StringAfterLastAction(element.attr("split"));
        case before:
            return new StringBeforeAction(element.attr("split"));
        case beforeLast:
            return new StringBeforeLastAction(element.attr("split"));
        case between:
            return new StringBetweenAction(element.attr("exp"));
        case filter:
            return new StringFilterAction(element.attr("filter"), element.attr("charType"));
        case replace:
            return new StringReplaceAction(element.attr("search"), element.attr("replacement"));
        case split:
            return new StringSplitAction(element.attr("split"), element.attr("index"));
        case sub:
            return new StringSubAction(element.attr("exp"));
        case suffix:
            return new StringSuffixAction(element.attr("suffix"));
        case perfix:
            return new StringPerfixAction(element.attr("perfix"));
        default:
            break;
        }
    } else if ("integer".equals(c) || "int".equals(c)) {
        IntegerActionType $type = EnumUtils.getEnum(IntegerActionType.class, element.attr("operation"));
        switch ($type) {
        case abs:
            return new IntegerAbsAction();
        case between:
            try {
                return new IntegerBetweenAction(element.attr("exp"), element.attr("default"));
            } catch (IntegerBetweenExpressionException e) {
                e.printStackTrace();
            }
        default:
            break;
        }
    } else if ("date".equals(c)) {

    } else if ("numerica".equals(c)) {
        IntegerActionType $type = EnumUtils.getEnum(IntegerActionType.class, element.attr("operation"));
        switch ($type) {
        case abs:
            return new IntegerAbsAction();
        case between:
            try {
                return new IntegerBetweenAction(element.attr("exp"), element.attr("default"));
            } catch (IntegerBetweenExpressionException e) {
                e.printStackTrace();
            }
        default:
            break;
        }
    } else if ("file".equals(c)) {
        FileActionType $type = EnumUtils.getEnum(FileActionType.class, element.attr("operation"));
        switch ($type) {
        case download:
            String dir = element.attr("dir");
            String temp = element.attr("fileName");
            boolean md5File = false, asyn;
            if (StringUtils.isNotBlank(temp)) {
                if ("{md5}".equals(temp)) {
                    md5File = true;
                }
            } else
                md5File = true;

            temp = element.attr("asyn");
            if (StringUtils.isNotBlank(temp)) {
                asyn = Boolean.parseBoolean(temp);
            } else {
                asyn = true;
            }
            return new DownLoadFileAction(dir, md5File, asyn);
        case download_resize:
            String dir2 = element.attr("dir");
            String temp2 = element.attr("fileName");
            boolean md5File2 = false, asyn2;
            if (StringUtils.isNotBlank(temp2)) {
                if ("{md5}".equals(temp2)) {
                    md5File2 = true;
                }
            } else
                md5File2 = true;
            temp2 = element.attr("asyn");

            if (StringUtils.isNotBlank(temp2)) {
                asyn2 = Boolean.parseBoolean(temp2);
            } else {
                asyn2 = true;
            }
            DownLoadImageResizeAction resizeAction = new DownLoadImageResizeAction(dir2, md5File2, asyn2);

            temp2 = element.attr("width");
            if (StringUtils.isNotBlank(temp2)) {
                resizeAction.setW(Integer.parseInt(temp2));
            }

            temp2 = element.attr("height");
            if (StringUtils.isNotBlank(temp2)) {
                resizeAction.setH(Integer.parseInt(temp2));
            }
            temp2 = element.attr("quality");
            if (StringUtils.isNotBlank(temp2)) {
                resizeAction.setQuality(Float.parseFloat(temp2));
            }
            temp2 = element.attr("del");
            if (StringUtils.isNotBlank(temp2)) {
                resizeAction.setDeleteOldFile(Boolean.parseBoolean(temp2));
            }
            return resizeAction;
        default:
            break;
        }
    } else {
        StringActionType $type = EnumUtils.getEnum(StringActionType.class, element.attr("operation"));
        if (null == $type) {
            try {
                throw new Exception(
                        "?" + element.tagName() + "operation");
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        switch ($type) {
        case after:
            return new StringAfterAction(element.attr("split"));
        case afterLast:
            return new StringAfterLastAction(element.attr("split"));
        case before:
            return new StringBeforeAction(element.attr("split"));
        case beforeLast:
            return new StringBeforeLastAction(element.attr("split"));
        case between:
            return new StringBetweenAction(element.attr("exp"));
        case filter:
            return new StringFilterAction(element.attr("filter"), element.attr("charType"));
        case replace:
            return new StringReplaceAction(element.attr("search"), element.attr("replacement"));
        case split:
            return new StringSplitAction(element.attr("split"), element.attr("index"));
        case sub:
            return new StringSubAction(element.attr("exp"));
        case suffix:
            return new StringSuffixAction(element.attr("suffix"));
        case perfix:
            return new StringPerfixAction(element.attr("perfix"));
        default:
            break;
        }
    }
    return null;
}

From source file:org.sbs.goodcrawler.extractor.selector.factory.ElementCssSelectorFactory.java

/**
 * <b>Element??Element??select/*from ww w  .j  a  va  2  s . co m*/
 * @param element
 * @return
 */
@SuppressWarnings("rawtypes")
public static AbstractElementCssSelector create(Element element) {
    String name = element.attr("name");
    String value = element.attr("value");
    String type = element.attr("type");
    String attr = element.attr("attr");
    String pattern = element.attr("pattern");
    String regex = element.attr("regex");
    String required = element.attr("required");
    String sIndex = element.attr("index");
    boolean isRequired = false;
    if (StringUtils.isNotBlank(required)) {
        isRequired = Boolean.parseBoolean(required);
    }
    int index = 0;
    if (StringUtils.isNotBlank(sIndex)) {
        index = Integer.parseInt(sIndex);
    }
    AbstractElementCssSelector selector = ElementCssSelectorFactory.create(name, type, value, attr, isRequired,
            index, regex, pattern);
    // ?
    Elements children = element.children();
    for (Element e : children) {
        if ("action".equals(e.tagName())) {
            SelectorAction action = ActionFactory.create(e, element.attr("type"));
            if (action != null)
                selector.addAction(action);
        }
        // ?Url
        else if ("element".equals(e.tagName())) {
            ((PageElementSelector) selector).addSelector(create(e));
        }
    }
    return selector;
}

From source file:org.sbs.goodcrawler.jobconf.ExtractConfig.java

/**
 * ????/*  ww  w. ja v  a 2  s.  co m*/
 * @param doc
 * @return
 * @throws ConfigurationException
 */
public ExtractConfig loadConfig(Document doc) throws ConfigurationException {
    Elements extractElement = doc.select("extract");
    super.jobName = doc.select("job").attr("name");
    super.indexName = doc.select("job").attr("indexName");
    String temp = extractElement.select("threadNum").text();
    if (StringUtils.isNotBlank(temp)) {
        this.threadNum = Integer.parseInt(temp);
    }

    Elements templateElement = extractElement.select("extract").select("template");
    Iterator<Element> it = templateElement.iterator();
    while (it.hasNext()) {
        Element template = it.next();
        ExtractTemplate extractTemplate = new ExtractTemplate();
        // ?Url????
        Elements urlPatternElement = template.select("url");
        List<Pattern> patterns = Lists.newArrayList();
        for (Element urlElement : urlPatternElement) {
            patterns.add(Pattern.compile(urlElement.text()));
        }
        extractTemplate.setUrlPattern(patterns);
        extractTemplate.setName(template.attr("name"));
        // ???
        Elements selectElement = template.select("elements").first().children();
        for (Element element : selectElement) {
            if ("element".equals(element.tagName())) {
                AbstractElementCssSelector<?> selector = ElementCssSelectorFactory.create(element);
                extractTemplate.addCssSelector(selector);
            } else if ("if".equals(element.tagName())) {
                IFConditions ifConditions = IFConditions.create(element);
                extractTemplate.addConditions(ifConditions);
            }
        }
        this.templates.add(extractTemplate);
    }
    return this;
}

From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java

private void getActors(MediaMetadata md, Element e) {
    for (Element character : e.children()) {
        MediaCastMember member = new MediaCastMember(CastType.ACTOR);
        for (Element characterInfo : character.children()) {
            if ("name".equalsIgnoreCase(characterInfo.tagName())) {
                member.setCharacter(characterInfo.text());
            }/*from  w  w w  .j av  a  2s . c om*/
            if ("seiyuu".equalsIgnoreCase(characterInfo.tagName())) {
                member.setName(characterInfo.text());
                String image = characterInfo.attr("picture");
                if (StringUtils.isNotBlank(image)) {
                    member.setImageUrl("http://img7.anidb.net/pics/anime/" + image);
                }
            }
        }
        md.addCastMember(member);
    }
}

From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java

private void getRating(MediaMetadata md, Element e) {
    for (Element rating : e.children()) {
        if ("temporary".equalsIgnoreCase(rating.tagName())) {
            try {
                md.storeMetadata(MediaMetadata.RATING, Float.parseFloat(rating.text()));
                md.storeMetadata(MediaMetadata.VOTE_COUNT, Integer.parseInt(rating.attr("count")));
                break;
            } catch (NumberFormatException ex) {
            }/*from  w  w w .j  a  va2  s .  c  o m*/
        }
    }
}

From source file:org.tinymediamanager.scraper.anidb.AniDBMetadataProvider.java

private List<Episode> parseEpisodes(Document doc) {
    List<Episode> episodes = new ArrayList<Episode>();

    Element anime = doc.child(0);
    Element eps = null;/*from w  w w.  j a v  a  2  s .c  o m*/
    // find the "episodes" child
    for (Element e : anime.children()) {
        if ("episodes".equalsIgnoreCase(e.tagName())) {
            eps = e;
            break;
        }
    }

    if (eps == null) {
        return episodes;
    }

    for (Element e : eps.children()) {
        // filter out the desired episode
        if ("episode".equals(e.tagName())) {
            Episode episode = new Episode();
            try {
                episode.id = Integer.parseInt(e.attr("id"));
            } catch (NumberFormatException ex) {
            }
            for (Element episodeInfo : e.children()) {
                if ("epno".equalsIgnoreCase(episodeInfo.tagName())) {
                    try {
                        episode.episode = Integer.parseInt(episodeInfo.text());

                        // looks like anidb is storing anything in a single season, so put 1 to season, if type = 1
                        if ("1".equals(episodeInfo.attr("type"))) {
                            episode.season = 1;
                        } else {
                            // else - we see them as "specials"
                            episode.season = 0;
                        }

                    } catch (NumberFormatException ex) {
                    }
                    continue;
                }

                if ("length".equalsIgnoreCase(episodeInfo.tagName())) {
                    try {
                        episode.runtime = Integer.parseInt(episodeInfo.text());
                    } catch (NumberFormatException ex) {
                    }
                    continue;
                }

                if ("airdate".equalsIgnoreCase(episodeInfo.tagName())) {
                    episode.airdate = episodeInfo.text();
                    continue;
                }

                if ("rating".equalsIgnoreCase(episodeInfo.tagName())) {
                    try {
                        episode.rating = Float.parseFloat(episodeInfo.text());
                    } catch (NumberFormatException ex) {
                    }
                    continue;
                }

                if ("title".equalsIgnoreCase(episodeInfo.tagName())) {
                    try {
                        episode.titles.put(episodeInfo.attr("xml:lang").toLowerCase(), episodeInfo.text());
                    } catch (Exception ex) {
                    }
                    continue;
                }

                if ("summary".equalsIgnoreCase(episodeInfo.tagName())) {
                    episode.summary = episodeInfo.text();
                    continue;
                }
            }
            episodes.add(episode);
        }
    }

    return episodes;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java

@Override
public List<MediaSearchResult> search(MediaSearchOptions query) throws Exception {
    LOGGER.debug("search() " + query.toString());
    /*//from ww w  .j  av a 2  s  .c o  m
     * IMDb matches seem to come in several "flavours".
     * 
     * Firstly, if there is one exact match it returns the matching IMDb page.
     * 
     * If that fails to produce a unique hit then a list of possible matches are returned categorised as: Popular Titles (Displaying ? Results) Titles
     * (Exact Matches) (Displaying ? Results) Titles (Partial Matches) (Displaying ? Results)
     * 
     * We should check the Exact match section first, then the poplar titles and finally the partial matches.
     * 
     * Note: That even with exact matches there can be more than 1 hit, for example "Star Trek"
     */

    Pattern imdbIdPattern = Pattern.compile("/title/(tt[0-9]{7})/");

    List<MediaSearchResult> result = new ArrayList<MediaSearchResult>();

    String searchTerm = "";

    if (StringUtils.isNotEmpty(query.get(SearchParam.IMDBID))) {
        searchTerm = query.get(SearchParam.IMDBID);
    }

    if (StringUtils.isEmpty(searchTerm)) {
        searchTerm = query.get(SearchParam.QUERY);
    }

    if (StringUtils.isEmpty(searchTerm)) {
        searchTerm = query.get(SearchParam.TITLE);
    }

    if (StringUtils.isEmpty(searchTerm)) {
        return result;
    }

    // parse out language and coutry from the scraper options
    String language = query.get(SearchParam.LANGUAGE);
    String myear = query.get(SearchParam.YEAR);
    String country = query.get(SearchParam.COUNTRY); // for passing the country to the scrape

    searchTerm = MetadataUtil.removeNonSearchCharacters(searchTerm);

    StringBuilder sb = new StringBuilder(imdbSite.getSite());
    sb.append("find?q=");
    try {
        // search site was everytime in UTF-8
        sb.append(URLEncoder.encode(searchTerm, "UTF-8"));
    } catch (UnsupportedEncodingException ex) {
        // Failed to encode the movie name for some reason!
        LOGGER.debug("Failed to encode search term: " + searchTerm);
        sb.append(searchTerm);
    }

    // we need to search for all - otherwise we do not find TV movies
    sb.append(CAT_TITLE);

    LOGGER.debug("========= BEGIN IMDB Scraper Search for: " + sb.toString());
    Document doc;
    try {
        CachedUrl url = new CachedUrl(sb.toString());
        url.addHeader("Accept-Language", getAcceptLanguage(language, country));
        doc = Jsoup.parse(url.getInputStream(), "UTF-8", "");
    } catch (Exception e) {
        LOGGER.debug("tried to fetch search response", e);

        // clear Cache
        CachedUrl.removeCachedFileForUrl(sb.toString());

        return result;
    }

    // check if it was directly redirected to the site
    Elements elements = doc.getElementsByAttributeValue("rel", "canonical");
    for (Element element : elements) {
        MediaMetadata md = null;
        // we have been redirected to the movie site
        String movieName = null;
        String movieId = null;

        String href = element.attr("href");
        Matcher matcher = imdbIdPattern.matcher(href);
        while (matcher.find()) {
            if (matcher.group(1) != null) {
                movieId = matcher.group(1);
            }
        }

        // get full information
        if (!StringUtils.isEmpty(movieId)) {
            MediaScrapeOptions options = new MediaScrapeOptions();
            options.setImdbId(movieId);
            options.setLanguage(MediaLanguages.valueOf(language));
            options.setCountry(CountryCode.valueOf(country));
            options.setScrapeCollectionInfo(Boolean.parseBoolean(query.get(SearchParam.COLLECTION_INFO)));
            options.setScrapeImdbForeignLanguage(
                    Boolean.parseBoolean(query.get(SearchParam.IMDB_FOREIGN_LANGUAGE)));
            md = getMetadata(options);
            if (!StringUtils.isEmpty(md.getStringValue(MediaMetadata.TITLE))) {
                movieName = md.getStringValue(MediaMetadata.TITLE);
            }
        }

        // if a movie name/id was found - return it
        if (StringUtils.isNotEmpty(movieName) && StringUtils.isNotEmpty(movieId)) {
            MediaSearchResult sr = new MediaSearchResult(providerInfo.getId());
            sr.setTitle(movieName);
            sr.setIMDBId(movieId);
            sr.setYear(md.getStringValue(MediaMetadata.YEAR));
            sr.setMetadata(md);
            sr.setScore(1);

            // and parse out the poster
            String posterUrl = "";
            Element td = doc.getElementById("img_primary");
            if (td != null) {
                Elements imgs = td.getElementsByTag("img");
                for (Element img : imgs) {
                    posterUrl = img.attr("src");
                    posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                    posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
                    posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", "");
                }
            }
            if (StringUtils.isNotBlank(posterUrl)) {
                sr.setPosterUrl(posterUrl);
            }

            result.add(sr);
            return result;
        }
    }

    // parse results
    // elements = doc.getElementsByClass("result_text");
    elements = doc.getElementsByClass("findResult");
    for (Element tr : elements) {
        // we only want the tr's
        if (!"tr".equalsIgnoreCase(tr.tagName())) {
            continue;
        }

        // find the id / name
        String movieName = "";
        String movieId = "";
        String year = "";
        Elements tds = tr.getElementsByClass("result_text");
        for (Element element : tds) {
            // we only want the td's
            if (!"td".equalsIgnoreCase(element.tagName())) {
                continue;
            }

            // filter out unwanted results
            Pattern unwanted = Pattern.compile(".*\\((TV Series|TV Episode|Short|Video Game)\\).*"); // stripped out .*\\(Video\\).*|
            Matcher matcher = unwanted.matcher(element.text());
            if (matcher.find()) {
                continue;
            }

            // is there a localized name? (aka)
            String localizedName = "";
            Elements italics = element.getElementsByTag("i");
            if (italics.size() > 0) {
                localizedName = italics.text().replace("\"", "");
            }

            // get the name inside the link
            Elements anchors = element.getElementsByTag("a");
            for (Element a : anchors) {
                if (StringUtils.isNotEmpty(a.text())) {
                    // movie name
                    if (StringUtils.isNotBlank(localizedName) && !language.equals("en")) {
                        // take AKA as title, but only if not EN
                        movieName = localizedName;
                    } else {
                        movieName = a.text();
                    }

                    // parse id
                    String href = a.attr("href");
                    matcher = imdbIdPattern.matcher(href);
                    while (matcher.find()) {
                        if (matcher.group(1) != null) {
                            movieId = matcher.group(1);
                        }
                    }

                    // try to parse out the year
                    Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)");
                    matcher = yearPattern.matcher(element.text());
                    while (matcher.find()) {
                        if (matcher.group(1) != null) {
                            year = matcher.group(1);
                            break;
                        }
                    }
                    break;
                }
            }
        }

        // if an id/name was found - parse the poster image
        String posterUrl = "";
        tds = tr.getElementsByClass("primary_photo");
        for (Element element : tds) {
            Elements imgs = element.getElementsByTag("img");
            for (Element img : imgs) {
                posterUrl = img.attr("src");
                posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
                posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", "");
            }
        }

        // if no movie name/id was found - continue
        if (StringUtils.isEmpty(movieName) || StringUtils.isEmpty(movieId)) {
            continue;
        }

        MediaSearchResult sr = new MediaSearchResult(providerInfo.getId());
        sr.setTitle(movieName);
        sr.setIMDBId(movieId);
        sr.setYear(year);
        sr.setPosterUrl(posterUrl);

        // populate extra args
        MetadataUtil.copySearchQueryToSearchResult(query, sr);

        if (movieId.equals(query.get(SearchParam.IMDBID))) {
            // perfect match
            sr.setScore(1);
        } else {
            // compare score based on names
            float score = MetadataUtil.calculateScore(searchTerm, movieName);
            if (posterUrl.isEmpty() || posterUrl.contains("nopicture")) {
                LOGGER.debug("no poster - downgrading score by 0.01");
                score = score - 0.01f;
            }
            if (myear != null && !myear.isEmpty() && !myear.equals("0") && !myear.equals(year)) {
                LOGGER.debug("parsed year does not match search result year - downgrading score by 0.01");
                score = score - 0.01f;
            }
            sr.setScore(score);
        }

        result.add(sr);

        // only get 40 results
        if (result.size() >= 40) {
            break;
        }
    }
    Collections.sort(result);
    Collections.reverse(result);

    return result;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbParser.java

/**
 * do the search according to the type/*from  w w w . j  a  va2 s. co  m*/
 * 
 * @param query
 *          the search params
 * @return the found results
 */
protected List<MediaSearchResult> search(MediaSearchOptions query) throws Exception {
    List<MediaSearchResult> result = new ArrayList<>();

    /*
     * IMDb matches seem to come in several "flavours".
     * 
     * Firstly, if there is one exact match it returns the matching IMDb page.
     * 
     * If that fails to produce a unique hit then a list of possible matches are returned categorised as: Popular Titles (Displaying ? Results) Titles
     * (Exact Matches) (Displaying ? Results) Titles (Partial Matches) (Displaying ? Results)
     * 
     * We should check the Exact match section first, then the poplar titles and finally the partial matches.
     * 
     * Note: That even with exact matches there can be more than 1 hit, for example "Star Trek"
     */
    String searchTerm = "";

    if (StringUtils.isNotEmpty(query.getImdbId())) {
        searchTerm = query.getImdbId();
    }

    if (StringUtils.isEmpty(searchTerm)) {
        searchTerm = query.getQuery();
    }

    if (StringUtils.isEmpty(searchTerm)) {
        return result;
    }

    // parse out language and coutry from the scraper query
    String language = query.getLanguage().getLanguage();
    int myear = query.getYear();
    String country = query.getCountry().getAlpha2(); // for passing the country to the scrape

    searchTerm = MetadataUtil.removeNonSearchCharacters(searchTerm);

    StringBuilder sb = new StringBuilder(getImdbSite().getSite());
    sb.append("find?q=");
    try {
        // search site was everytime in UTF-8
        sb.append(URLEncoder.encode(searchTerm, "UTF-8"));
    } catch (UnsupportedEncodingException ex) {
        // Failed to encode the movie name for some reason!
        getLogger().debug("Failed to encode search term: " + searchTerm);
        sb.append(searchTerm);
    }

    // we need to search for all - otherwise we do not find TV movies
    sb.append(getSearchCategory());

    getLogger().debug("========= BEGIN IMDB Scraper Search for: " + sb.toString());
    Document doc;
    try {
        Url url = new Url(sb.toString());
        url.addHeader("Accept-Language", getAcceptLanguage(language, country));
        doc = Jsoup.parse(url.getInputStream(), "UTF-8", "");
    } catch (Exception e) {
        getLogger().debug("tried to fetch search response", e);
        return result;
    }

    // check if it was directly redirected to the site
    Elements elements = doc.getElementsByAttributeValue("rel", "canonical");
    for (Element element : elements) {
        MediaMetadata md = null;
        // we have been redirected to the movie site
        String movieName = null;
        String movieId = null;

        String href = element.attr("href");
        Matcher matcher = IMDB_ID_PATTERN.matcher(href);
        while (matcher.find()) {
            if (matcher.group(1) != null) {
                movieId = matcher.group(1);
            }
        }

        // get full information
        if (!StringUtils.isEmpty(movieId)) {
            MediaScrapeOptions options = new MediaScrapeOptions(type);
            options.setImdbId(movieId);
            options.setLanguage(query.getLanguage());
            options.setCountry(CountryCode.valueOf(country));
            md = getMetadata(options);
            if (!StringUtils.isEmpty(md.getTitle())) {
                movieName = md.getTitle();
            }
        }

        // if a movie name/id was found - return it
        if (StringUtils.isNotEmpty(movieName) && StringUtils.isNotEmpty(movieId)) {
            MediaSearchResult sr = new MediaSearchResult(ImdbMetadataProvider.providerInfo.getId(),
                    query.getMediaType());
            sr.setTitle(movieName);
            sr.setIMDBId(movieId);
            sr.setYear(md.getYear());
            sr.setMetadata(md);
            sr.setScore(1);

            // and parse out the poster
            String posterUrl = "";
            Elements posters = doc.getElementsByClass("poster");
            if (posters != null && !posters.isEmpty()) {
                Elements imgs = posters.get(0).getElementsByTag("img");
                for (Element img : imgs) {
                    posterUrl = img.attr("src");
                    posterUrl = posterUrl.replaceAll("UX[0-9]{2,4}_", "UX200_");
                    posterUrl = posterUrl.replaceAll("UY[0-9]{2,4}_", "UY200_");
                    posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", "");
                }
            }
            if (StringUtils.isNotBlank(posterUrl)) {
                sr.setPosterUrl(posterUrl);
            }

            result.add(sr);
            return result;
        }
    }

    // parse results
    // elements = doc.getElementsByClass("result_text");
    elements = doc.getElementsByClass("findResult");
    for (Element tr : elements) {
        // we only want the tr's
        if (!"tr".equalsIgnoreCase(tr.tagName())) {
            continue;
        }

        // find the id / name
        String movieName = "";
        String movieId = "";
        int year = 0;
        Elements tds = tr.getElementsByClass("result_text");
        for (Element element : tds) {
            // we only want the td's
            if (!"td".equalsIgnoreCase(element.tagName())) {
                continue;
            }

            // filter out unwanted results
            Pattern unwantedSearchResultPattern = getUnwantedSearchResultPattern();
            if (unwantedSearchResultPattern != null) {
                Matcher matcher = unwantedSearchResultPattern.matcher(element.text());
                if (matcher.find()) {
                    continue;
                }
            }

            // is there a localized name? (aka)
            String localizedName = "";
            Elements italics = element.getElementsByTag("i");
            if (italics.size() > 0) {
                localizedName = italics.text().replace("\"", "");
            }

            // get the name inside the link
            Elements anchors = element.getElementsByTag("a");
            for (Element a : anchors) {
                if (StringUtils.isNotEmpty(a.text())) {
                    // movie name
                    if (StringUtils.isNotBlank(localizedName) && !language.equals("en")) {
                        // take AKA as title, but only if not EN
                        movieName = localizedName;
                    } else {
                        movieName = a.text();
                    }

                    // parse id
                    String href = a.attr("href");
                    Matcher matcher = IMDB_ID_PATTERN.matcher(href);
                    while (matcher.find()) {
                        if (matcher.group(1) != null) {
                            movieId = matcher.group(1);
                        }
                    }

                    // try to parse out the year
                    Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)");
                    matcher = yearPattern.matcher(element.text());
                    while (matcher.find()) {
                        if (matcher.group(1) != null) {
                            try {
                                year = Integer.parseInt(matcher.group(1));
                                break;
                            } catch (Exception ignored) {
                            }
                        }
                    }
                    break;
                }
            }
        }

        // if an id/name was found - parse the poster image
        String posterUrl = "";
        tds = tr.getElementsByClass("primary_photo");
        for (Element element : tds) {
            Elements imgs = element.getElementsByTag("img");
            for (Element img : imgs) {
                posterUrl = img.attr("src");
                posterUrl = posterUrl.replaceAll("UX[0-9]{2,4}_", "UX200_");
                posterUrl = posterUrl.replaceAll("UY[0-9]{2,4}_", "UY200_");
                posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", "");
            }
        }

        // if no movie name/id was found - continue
        if (StringUtils.isEmpty(movieName) || StringUtils.isEmpty(movieId)) {
            continue;
        }

        MediaSearchResult sr = new MediaSearchResult(ImdbMetadataProvider.providerInfo.getId(),
                query.getMediaType());
        sr.setTitle(movieName);
        sr.setIMDBId(movieId);
        sr.setYear(year);
        sr.setPosterUrl(posterUrl);

        if (movieId.equals(query.getImdbId())) {
            // perfect match
            sr.setScore(1);
        } else {
            // compare score based on names
            float score = MetadataUtil.calculateScore(searchTerm, movieName);
            if (posterUrl.isEmpty() || posterUrl.contains("nopicture")) {
                getLogger().debug("no poster - downgrading score by 0.01");
                score = score - 0.01f;
            }
            if (yearDiffers(myear, year)) {
                float diff = (float) Math.abs(year - myear) / 100;
                getLogger()
                        .debug("parsed year does not match search result year - downgrading score by " + diff);
                score -= diff;
            }
            sr.setScore(score);
        }

        result.add(sr);

        // only get 40 results
        if (result.size() >= 40) {
            break;
        }
    }
    Collections.sort(result);
    Collections.reverse(result);

    return result;
}