Example usage for org.jsoup.nodes Document getElementsByTag

List of usage examples for org.jsoup.nodes Document getElementsByTag

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementsByTag.

Prototype

public Elements getElementsByTag(String tagName) 

Source Link

Document

Finds elements, including and recursively under this element, with the specified tag name.

Usage

From source file:org.loklak.api.search.WordpressCrawlerService.java

public static SusiThought crawlWordpress(String blogURL) {
    Document blogHTML = null;

    Elements articles = null;// w  ww . j  a  v  a 2  s .  co  m
    Elements articleList_title = null;
    Elements articleList_content = null;
    Elements articleList_dateTime = null;
    Elements articleList_author = null;

    String[][] blogPosts = new String[100][4];

    // blogPosts[][0] = Blog Title
    // blogPosts[][1] = Posted On
    // blogPosts[][2] = Author
    // blogPosts[][3] = Blog Content

    Integer numberOfBlogs = 0;
    Integer iterator = 0;

    try {
        blogHTML = Jsoup.connect(blogURL).get();
    } catch (IOException e) {
        e.printStackTrace();
    }

    articles = blogHTML.getElementsByTag("article");

    iterator = 0;
    for (Element article : articles) {

        articleList_title = article.getElementsByClass("entry-title");
        for (Element blogs : articleList_title) {
            blogPosts[iterator][0] = blogs.text().toString();
        }

        articleList_dateTime = article.getElementsByClass("posted-on");
        for (Element blogs : articleList_dateTime) {
            blogPosts[iterator][1] = blogs.text().toString();
        }

        articleList_author = article.getElementsByClass("byline");
        for (Element blogs : articleList_author) {
            blogPosts[iterator][2] = blogs.text().toString();
        }

        articleList_content = article.getElementsByClass("entry-content");
        for (Element blogs : articleList_content) {
            blogPosts[iterator][3] = blogs.text().toString();
        }

        iterator++;

    }

    numberOfBlogs = iterator;

    JSONArray blog = new JSONArray();

    for (int k = 0; k < numberOfBlogs; k++) {
        JSONObject blogpost = new JSONObject();
        blogpost.put("blog_url", blogURL);
        blogpost.put("title", blogPosts[k][0]);
        blogpost.put("posted_on", blogPosts[k][1]);
        blogpost.put("author", blogPosts[k][2]);
        blogpost.put("content", blogPosts[k][3]);
        blog.put(blogpost);
    }

    SusiThought json = new SusiThought();
    json.setData(blog);
    return json;

}

From source file:org.openhab.tools.analysis.checkstyle.AboutHtmlCheck.java

private void checkLicenseHeader(Document processedAboutHtmlFileDocument) throws CheckstyleException {
    Elements processedAboutHtmlFileHeaderTags = processedAboutHtmlFileDocument.getElementsByTag(HEADER_3_TAG);
    if (!isElementProvided(processedAboutHtmlFileHeaderTags, LICENSE_HEADER)) {
        log(0, "Invalid or missing license header in the about.html file. " + VALID_ABOUT_HTML_FILE_LINK_MSG
                + validAboutHtmlFileURL);
    }//www  .  j a  v  a  2s. co  m
}

From source file:org.openhab.tools.analysis.checkstyle.AboutHtmlCheck.java

private void checkLicenseParagraph(Document processedAboutHtmlFileDocument) {
    Document validAboutHtmlFileDocument = Jsoup.parse(validAboutHtmlFileContent);

    Elements validAboutHtmlFileParagraphTags = validAboutHtmlFileDocument.getElementsByTag(PARAGRAPH_TAG);
    // the paragraph with index 1 in the valid about.html file
    // is the license paragraph
    Element validAboutHtmlFileLicenseParagraph = validAboutHtmlFileParagraphTags.get(1);
    String validAboutHtmlFileLicenseParagraphContent = validAboutHtmlFileLicenseParagraph.html();
    Elements processedFileParagraphTags = processedAboutHtmlFileDocument.getElementsByTag(PARAGRAPH_TAG);

    if (!isElementProvided(processedFileParagraphTags, validAboutHtmlFileLicenseParagraphContent)) {
        log(0, "Invalid or missing license paragraph in the about.html file. " + VALID_ABOUT_HTML_FILE_LINK_MSG
                + validAboutHtmlFileURL);
    }//from www  . ja v  a 2  s  .  c  o m
}

From source file:org.openmrs.module.radiology.report.template.DefaultMrrtReportTemplateFileParser.java

/**
 * @see MrrtReportTemplateFileParser#parse(String)
 *///from  w  w w.j av a  2s . c  o m
@Override
public MrrtReportTemplate parse(String mrrtTemplate) throws IOException {

    validator.validate(mrrtTemplate);

    final Document doc = Jsoup.parse(mrrtTemplate, "");
    final MrrtReportTemplate result = new MrrtReportTemplate();
    initializeTemplate(result, doc);
    try {
        addTermsToTemplate(result, doc.getElementsByTag("script").get(0).toString());
    } catch (ParserConfigurationException | SAXException e) {
        throw new APIException("radiology.report.template.parser.error", null, e);
    }
    return result;
}

From source file:org.openmrs.module.radiology.report.template.DefaultMrrtReportTemplateFileParser.java

private final void initializeTemplate(MrrtReportTemplate template, Document doc) {
    final Elements metaTags = doc.getElementsByTag("meta");

    template.setPath(doc.baseUri());// ww  w.  j ava2s  . c o m
    template.setCharset(metaTags.attr("charset"));
    for (Element metaTag : metaTags) {
        final String name = metaTag.attr("name");
        final String content = metaTag.attr("content");

        switch (name) {
        case DCTERMS_TITLE:
            template.setDcTermsTitle(content);
            break;
        case DCTERMS_DESCRIPTION:
            template.setDcTermsDescription(content);
            break;
        case DCTERMS_IDENTIFIER:
            template.setDcTermsIdentifier(content);
            break;
        case DCTERMS_TYPE:
            template.setDcTermsType(content);
            break;
        case DCTERMS_LANGUAGE:
            template.setDcTermsLanguage(content);
            break;
        case DCTERMS_PUBLISHER:
            template.setDcTermsPublisher(content);
            break;
        case DCTERMS_RIGHTS:
            template.setDcTermsRights(content);
            break;
        case DCTERMS_LICENSE:
            template.setDcTermsLicense(content);
            break;
        case DCTERMS_DATE:
            template.setDcTermsDate(content);
            break;
        case DCTERMS_CREATOR:
            template.setDcTermsCreator(content);
            break;
        default:
            log.debug("Unhandled meta tag " + name);
        }
    }
}

From source file:org.openmrs.module.radiology.report.template.XsdMrrtReportTemplateValidator.java

/**
 * @see MrrtReportTemplateValidator#validate(String)
 *//*from   w  ww .j  a  va2  s . c om*/
@Override
public void validate(String mrrtTemplate) throws IOException {

    final Document document = Jsoup.parse(mrrtTemplate, "");
    final Elements metatags = document.getElementsByTag("meta");
    ValidationResult validationResult = metaTagsValidationEngine.run(metatags);

    final SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
    final Schema schema;
    final Validator validator;
    try (InputStream in = IOUtils.toInputStream(mrrtTemplate)) {
        schema = factory.newSchema(getSchemaFile());
        validator = schema.newValidator();
        validator.setErrorHandler(new ErrorHandler() {

            @Override
            public void warning(SAXParseException exception) throws SAXException {
                log.debug(exception.getMessage(), exception);
                validationResult.addError(exception.getMessage(), "");
            }

            @Override
            public void error(SAXParseException exception) throws SAXException {
                log.debug(exception.getMessage(), exception);
                validationResult.addError(exception.getMessage(), "");
            }

            @Override
            public void fatalError(SAXParseException exception) throws SAXException {
                log.debug(exception.getMessage(), exception);
                validationResult.addError(exception.getMessage(), "");
            }
        });
        validator.validate(new StreamSource(in));
        validationResult.assertOk();
    } catch (SAXException e) {
        log.error(e.getMessage(), e);
        throw new APIException("radiology.report.template.validation.error", null, e);
    }
}

From source file:org.opens.tanaguru.processing.ProcessRemarkServiceImplTest.java

/**
 * Test of setDocument method, of class ProcessRemarkServiceImpl.
 *///from w ww  .ja  va 2s.  c o m
public void testGetSnippetFromElement() {
    ProcessRemarkServiceImpl instance = new ProcessRemarkServiceImpl(null, null, null, null);

    //--------------------------------------------------------------------//
    //-----------------------Test1----------------------------------------//
    //--------------------------------------------------------------------//
    String rawHtml = "<label> <span>Rechercher:</span> "
            + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" "
            + "class=\"text\" id=\"searchfield\" " + "name=\"search&qudsqqqssqdsqdsqdo\" /></label>";
    Document document = Jsoup.parse(rawHtml);
    Element element = document.getElementsByTag("label").iterator().next();
    String snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    String expectedSnippet = "<label> <span>Rechercher:</span> "
            + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" "
            + "class=\"text\" id=\"searchfield\" " + "name=\"search&amp;qudsqqqssqdsqdsqdo\" />[...]</label>";
    assertEquals(expectedSnippet, snippet);

    //--------------------------------------------------------------------//
    //-----------------------Test2----------------------------------------//
    //--------------------------------------------------------------------//
    rawHtml = "<label> <span>New Rechercher:</span> "
            + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\" "
            + " id=\"searchfield\" class=\"myclass other-class1 other-class2\" > " + "anything</p></label>";
    document = Jsoup.parse(rawHtml);
    element = document.getElementsByTag("label").iterator().next();
    snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    expectedSnippet = "<label> <span>New Rechercher:</span> "
            + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\""
            + " id=\"searchfield\" class=\"myclass other-class1 other-class2\">" + "[...]</p>[...]</label>";
    assertEquals(expectedSnippet, snippet);

    //--------------------------------------------------------------------//
    //-----------------------Test3----------------------------------------//
    //--------------------------------------------------------------------//
    rawHtml = "<iframe align=\"left\" width=\"315px\" " + "scrolling=\"no\" height=\"160px\" frameborder=\"0\" "
            + "id=\"link-meteo\" src=\"http://www.anyUrl.com/module/onelocationsearch?ShowSearch=true&amp;StartDate=2012-06-01&amp;Days=2&amp;location=bruxelles&amp;url=http://meteo1.lavenir.net&amp;cssfile=http://lavenir.net/extra/weather/styles.css\">"
            + "</iframe> ";
    document = Jsoup.parse(rawHtml);
    element = document.getElementsByTag("iframe").iterator().next();
    snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    expectedSnippet = rawHtml.trim();
    assertEquals(expectedSnippet, snippet);

    //--------------------------------------------------------------------//
    //-----------------------Test4----------------------------------------//
    //--------------------------------------------------------------------//
    rawHtml = " <center>  <script type=\"text/javascript\">    if (articledetail == false) initAdhese('IMU.SUPER.WIDE');     </script> "
            + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\"></script> "
            + " <div class=\"adhese_300x250\">  <script src=\"http://1.adhesecdn.be/pool/lib/68641.js?t=1371729603000\"></script> "
            + "<script src=\"http://anyUrl.com/pagead/show_ads.js\" type=\"text/javascript\"></script>"
            + "<ins style=\"display:inline-table;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\">"
            + "<ins style=\"display:block;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\" id=\"aswift_1_anchor\">"
            + "<iframe width=\"300\" scrolling=\"no\" height=\"250\" frameborder=\"0\" style=\"left:0;position:absolute;top:0;\" name=\"aswift_1\" id=\"aswift_1\" onload=\"var i=this.id,s=window.google_iframe_oncopy,H=s&amp;&amp;s.handlers,h=H&amp;&amp;H[i],w=this.contentWindow,d;try{d=w.document}catch(e){}if(h&amp;&amp;d&amp;&amp;(!d.body||!d.body.firstChild)){if(h.call){setTimeout(h,0)}else if(h.match){w.location.replace(h)}}\" allowtransparency=\"true\" hspace=\"0\" vspace=\"0\" marginheight=\"0\" marginwidth=\"0\"></iframe>"
            + "</ins>" + "</ins>" + "</div> " + "</center> ";
    document = Jsoup.parse(rawHtml);
    element = document.getElementsByTag("center").iterator().next();
    snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    expectedSnippet = "<center> <script type=\"text/javascript\"> if (articledetail == false) initAdhese('IMU.SUPER.WIDE'); </script> "
            + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\">[...]</script>"
            + "[...]</center>";
    assertEquals(expectedSnippet, snippet);
}

From source file:org.sbs.goodcrawler.plugin.extract.ExtractorDytt8.java

@Override
public ExtractedPage<?, ?> onExtract(Page page) {
    if (null != page) {
        try {/*from  w ww. j a  v  a  2  s.c o  m*/

            Document doc = Jsoup.parse(new String(page.getContentData(), page.getContentCharset()),
                    urlUtils.getBaseUrl(page.getWebURL().getURL()));
            if (null != page.getWebURL().getURL() && page.getWebURL().getURL().contains("game/"))
                return null;
            // ???Url?Url
            Elements links = doc.getElementsByTag("a");
            if (!links.isEmpty()) {
                for (Element link : links) {
                    String linkHref = link.absUrl("href");
                    if (StringUtils.isNotBlank(linkHref) && filterUrls(linkHref)) {
                        try {
                            WebURL url = new WebURL();

                            url.setURL(linkHref);
                            url.setJobName(conf.jobName);
                            pendingUrls.addUrl(url);
                        } catch (QueueException e) {
                            log.error(e.getMessage());
                        } catch (Exception e) {
                            log.error(e.getMessage());
                        }
                    }
                }
            }
            // ??
            //            Map<String, String> selects = conf.getSelects();
            Map<String, String> selects = null;
            ExtractedPage<String, Object> epage = pendingStore.new ExtractedPage<String, Object>();
            epage.setUrl(page.getWebURL());
            HashMap<String, Object> result = new HashMap<>();
            Elements text = doc.select("#Zoom");
            if (null == text || text.size() == 0) {
                return null;
            }
            String name = doc.select("h1").text();
            name = name.replace("", "").replace("<<", "").replace("", "").replace(">>", "");
            result.put("movie", name);
            //            result.put("_id", name);
            String ts[] = doc.select("h2 a").text().split(" ");
            if (ts.length >= 2) {
                result.put("type", ts[1].trim());
            } else {
                result.put("type", "unknow");
            }
            result.put("url", page.getWebURL().getURL());
            for (Entry<String, String> entry : selects.entrySet()) {
                Elements elements = doc.select(entry.getValue());
                if (elements.isEmpty())
                    return null;
                else {
                    if ("content".equals(entry.getKey())) {

                        for (Element element : elements) {
                            // 
                            Elements imgs = element.select("img[src]");
                            StringBuilder sb = new StringBuilder();
                            for (Element img : imgs) {
                                sb.append(img.attr("src")).append(";");
                            }
                            result.put("img", sb.toString());
                            // ?
                            Elements movieInfos = element.select("p");
                            for (Element info : movieInfos) {
                                String infotext = info.text();
                                try {
                                    String infotext_ = info.html();
                                    int start, end = 0;
                                    start = infotext_.indexOf("");
                                    if (start > 0) {
                                        end = infotext_.lastIndexOf("");
                                        if (end > 0 && start < end) {
                                            result.put("jq", infotext_.substring(start, end));
                                        } else {
                                            end = infotext_.lastIndexOf(".");
                                            if (end > 0 && start < end) {
                                                result.put("jq", infotext_.substring(start, end));
                                            }
                                        }
                                    }
                                    infotext_ = null;
                                } catch (Exception e) {
                                    e.printStackTrace();
                                }

                                if (infotext.startsWith("")) {
                                    String ss[] = infotext.split("");
                                    for (String s : ss) {
                                        s.trim();
                                        result = getInfoName(s, result);
                                    }
                                } else if (infotext.startsWith("?")) {
                                    String ss[] = infotext.split("?");
                                    for (String s : ss) {
                                        s.trim();
                                        result = getInfoName(s, result);
                                    }
                                } else if (infotext.contains("")) {
                                    infotext = info.html();
                                    String[] ss = infotext.split("<br />");
                                    for (String s : ss) {
                                        s.trim();
                                        result = getInfoName(s, result);
                                    }
                                } else if (infotext.contains(":")) {
                                    infotext = info.html();
                                    String[] ss = infotext.split("<br />");
                                    for (String s : ss) {
                                        s.trim();
                                        result = getInfoName(s, result);
                                    }
                                }
                            }

                            //                        if(result.size()<5){
                            //                           result.put("content", value)
                            //                        }

                            // ?
                            Elements elements2 = elements.select("td");
                            sb.setLength(0);
                            for (Element download : elements2) {
                                sb.append(download.text()).append(";");
                            }
                            result.put("download", sb.toString());
                        }
                    }
                }
                //               result.put(entry.getKey(), elements.html());
            }
            if (StringUtils.isNotBlank((String) result.get("nd"))) {
                result.put("nd", Integer.parseInt((String) result.get("nd")));
            }
            epage.setMessages(result);
            try {
                pendingStore.addExtracedPage(epage);
            } catch (QueueException e) {
                log.error(e.getMessage());
            }
            return epage;
        } catch (UnsupportedEncodingException e) {
            log.error(e.getMessage());
            e.printStackTrace();
        }
    }
    return null;
}

From source file:org.sbs.goodcrawler.plugin.extract.ExtractYouku.java

/**
 * ??//from ww w . j  ava2 s .  c  o m
 * @param url
 * @return
 */
public HashMap<String, Object> getInformation(Page page) {
    HashMap<String, Object> map = Maps.newHashMap();
    String url = page.getWebURL().getURL();
    try {
        ExtractedPage<String, Object> epage = pendingStore.new ExtractedPage<String, Object>();
        epage.setUrl(page.getWebURL());
        Document doc = Jsoup.parse(new String(page.getContentData(), page.getContentCharset()),
                urlUtils.getBaseUrl(page.getWebURL().getURL()));
        // ???Url?Url
        Elements links = doc.getElementsByTag("a");
        if (!links.isEmpty()) {
            for (Element link : links) {
                String linkHref = link.absUrl("href");
                if (filterUrls(linkHref)) {
                    WebURL weburl = new WebURL();
                    weburl.setURL(linkHref);
                    weburl.setJobName(conf.jobName);
                    Submitor.submitUrl(weburl);
                }
            }
        }
        if (url.contains("/show_page/")) {

            String title = doc.select(".title .name").text();
            if (StringUtils.isBlank(title))
                return null;
            map.put("title", title);
            String category = doc.select(".title .type a").text();
            if (StringUtils.isBlank(category))
                return null;
            map.put("category", category);

            String _year = CharMatcher.DIGIT.retainFrom(doc.select(".title .pub").text());
            if (StringUtils.isNotBlank(_year)) {
                int year = Integer.parseInt(_year);
                map.put("year", year);
            }

            String score = CharMatcher.DIGIT.retainFrom(doc.select(".ratingstar .num").text());
            map.put("score", score);
            String alias = doc.select(".alias").text();

            if (alias.contains(":")) {
                map.put("translation", alias.split(":")[1]);
            }
            String img = doc.select(".thumb img").attr("src");
            if (StringUtils.isBlank(img))
                return null;
            map.put("thumbnail", img);
            String area = doc.select(".row2 .area a").text();
            if (StringUtils.isBlank(area))
                return null;
            map.put("area", area);
            String[] type = doc.select(".row2 .type a").text().split(" ");
            if (null == type || type.length == 0)
                return null;
            map.put("type", Sets.newHashSet(type));
            String director = doc.select(".row2 .director a").text();
            map.put("director", director);

            String _duration = CharMatcher.DIGIT.retainFrom(doc.select(".row2 .duration").text());
            if (StringUtils.isNotBlank(_duration)) {
                int duration = Integer.parseInt(_duration);
                map.put("duration", duration);
            }
            String _hot = CharMatcher.anyOf(",").removeFrom(doc.select(".row2 .vr .num").text());
            _hot = CharMatcher.DIGIT.retainFrom(_hot);
            if (StringUtils.isNotBlank(_hot)) {
                int hot = Integer.parseInt(_hot);
                map.put("hot", hot);
            }

            String sumary = doc.select(".detail .long").text();
            map.put("summary", sumary);
            // 
            Elements elements = doc.select(".baseaction a");
            HashMap<String, String> playList = Maps.newHashMap();
            for (Element element : elements) {
                String n = element.text();
                String urlString = element.attr("href");
                if (StringUtils.isBlank(urlString))
                    return null;
                Document d2 = Jsoup.parse(new URL(urlString), 10000);
                if (null != d2) {
                    String x = d2.select("#link2").attr("value");
                    if (StringUtils.isBlank(x))
                        return null;
                    playList.put(n, x);
                }
            }
            map.put("online", playList);
        } else if (url.contains("/v_show/")) {
            Document d3 = Jsoup.parse(new String(page.getContentData(), page.getContentCharset()),
                    urlUtils.getBaseUrl(page.getWebURL().getURL()));
            // ???Url?Url
            Elements links2 = d3.getElementsByTag("a");
            if (!links2.isEmpty()) {
                for (Element link : links2) {
                    String linkHref = link.absUrl("href");
                    if (filterUrls(linkHref)) {
                        WebURL weburl = new WebURL();
                        weburl.setURL(linkHref);
                        weburl.setJobName(conf.jobName);
                        try {
                            pendingUrls.addUrl(weburl);
                        } catch (QueueException e) {
                            log.error(e.getMessage());
                        }
                    }
                }
            }
            String p = d3.select("h1.title a").attr("href");
            if (StringUtils.isBlank(p))
                return null;
            return getInformation(p);
        }
    } catch (MalformedURLException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    if (map != null && map.size() > 4) {
        if (null == map.get("year")) {
            map.put("year", 1800);
        }
    }
    return map;
}

From source file:org.sbs.goodcrawler.plugin.extract.ExtractYouku.java

private HashMap<String, Object> getInformation(String p) {
    HashMap<String, Object> map = Maps.newHashMap();
    try {/*  ww  w .  ja  va2  s  . co  m*/
        if (p.contains("/show_page/")) {
            Document doc = Jsoup.parse(new URL(p), 15000);
            // ???Url?Url
            Elements links = doc.getElementsByTag("a");
            if (!links.isEmpty()) {
                for (Element link : links) {
                    String linkHref = link.absUrl("href");
                    if (filterUrls(linkHref)) {
                        WebURL weburl = new WebURL();
                        weburl.setURL(linkHref);
                        weburl.setJobName(conf.jobName);
                        try {
                            pendingUrls.addUrl(weburl);
                        } catch (QueueException e) {
                            log.error(e.getMessage());
                        }
                    }
                }
            }

            String title = doc.select(".title .name").text();
            if (StringUtils.isBlank(title))
                return null;
            map.put("title", title);
            String category = doc.select(".title .type a").text();
            if (StringUtils.isBlank(category))
                return null;
            map.put("category", category);

            String _year = CharMatcher.DIGIT.retainFrom(doc.select(".title .pub").text());
            if (StringUtils.isNotBlank(_year)) {
                int year = Integer.parseInt(_year);
                map.put("year", year);
            }

            String score = CharMatcher.DIGIT.retainFrom(doc.select(".ratingstar .num").text());
            map.put("score", score);
            String alias = doc.select(".alias").text();
            if (alias.contains(":")) {
                map.put("translation", alias.split(":")[1]);
            }
            String img = doc.select(".thumb img").attr("src");
            if (StringUtils.isBlank(img))
                return null;
            map.put("thumbnail", Lists.newArrayList(img));
            String area = doc.select(".row2 .area a").text();
            if (StringUtils.isBlank(area))
                return null;
            map.put("area", area);
            String[] type = doc.select(".row2 .type a").text().split(" ");
            if (null == type || type.length == 0)
                return null;
            map.put("type", Lists.newArrayList(type));
            String director = doc.select(".row2 .director a").text();
            map.put("director", director);

            String _duration = CharMatcher.DIGIT.retainFrom(doc.select(".row2 .duration").text());
            if (StringUtils.isNotBlank(_duration)) {
                int duration = Integer.parseInt(_duration);
                map.put("duration", duration);
            }
            String _hot = CharMatcher.anyOf(",").removeFrom(doc.select(".row2 .vr .num").text());
            if (StringUtils.isNotBlank(_hot)) {
                int hot = Integer.parseInt(_hot);
                map.put("hot", hot);
            }

            String sumary = doc.select(".detail .long").text();
            map.put("summary", sumary);
            // 
            Elements elements = doc.select(".baseaction a");
            HashMap<String, String> playList = Maps.newHashMap();
            for (Element element : elements) {
                String n = element.text();
                String urlString = element.attr("href");
                if (StringUtils.isBlank(urlString))
                    return null;
                Document d2 = Jsoup.parse(new URL(urlString), 10000);
                if (null != d2) {
                    String x = d2.select("#link2").attr("value");
                    if (StringUtils.isBlank(x))
                        return null;
                    playList.put(n, x);
                }
            }
            map.put("online", playList);
        } else
            return null;
    } catch (Exception e) {
        return map;
    }
    return map;
}