Example usage for org.jsoup.nodes Document toString

List of usage examples for org.jsoup.nodes Document toString

Introduction

In this page you can find the example usage for org.jsoup.nodes Document toString.

Prototype

public String toString() 

Source Link

Document

Gets this node's outer HTML.

Usage

From source file:ExtractorContentTest.java

private PCMStatistic computeStatistic(String wikiPageName) throws Exception {
    WikiPageContentExtractor wikipediaExtractor = new WikiPageContentExtractor();

    String content = wikipediaExtractor.getContent(wikiPageName);

    assertNotNull(content);/*from   ww  w .jav  a  2 s  . c o m*/
    FileUtils.writeStringToFile(new File("output/" + wikiPageName + ".wikipedia"), content);
    //System.err.println("content = " + content);

    WikiTabularExtractor wikiTabExtractor = new WikiTabularExtractor();

    //content = "'''Video converters''' are [[computer program]]s" ; 
    String htmlContent = wikiTabExtractor.run(content, "" + wikiPageName);

    assertNotNull(htmlContent);

    //Document doc = Jsoup.connect("http://en.wikipedia.org/w/index.php?title=" + wikiPageName).get();
    Document doc = Jsoup.parse(htmlContent);
    FileUtils.writeStringToFile(new File("output/" + wikiPageName + ".html"), doc.toString());

    //Element docContentEntryPoint = doc ; // doc.getElementsByClass("article-content").first(); 
    //Elements sections = docContentEntryPoint.getElementsByClass("section") ; 
    // FIXME what about no section ?
    //treatSection(doc.body());

    Elements tabs = doc.select("table");

    List<Catalog> catalogs = new ArrayList<Catalog>();
    for (Element section : tabs) {
        treatTable(section, catalogs);
    }

    Collection<CatalogStat> catalogStats = new ArrayList<CatalogStat>();
    for (Catalog catalog : catalogs) {
        int nHeaders = catalog.getHeaders().size();
        int nProduct = catalog.size();
        CatalogStat catalogStat = new CatalogStat();
        catalogStat.setNHeaders(nHeaders);
        catalogStat.setNProduct(nProduct);

        // analyze each product and all values
        int nUncertain = 0;
        int nBoolean = 0;
        int nEmpty = 0;
        int nMulti = 0;
        int nSingleV = 0;
        int nUnknowns = 0;
        int nConstrained = 0;

        for (Product product : catalog) {
            Collection<String> values = product.getAllValues();
            for (String val : values) {
                if (VariabilityPatternsUtils.isUncertain(val)) {
                    nUncertain++;
                }

                else if (VariabilityPatternsUtils.isYes(val) || VariabilityPatternsUtils.isNot(val)) { // pattern #1
                    nBoolean++;
                } else if (VariabilityPatternsUtils.isBlanked(val)) { // pattern #6
                    nEmpty++;
                } else if (VariabilityPatternsUtils.isMultiValues(val)) { // pattern #4
                    nMulti++;
                } else if (VariabilityPatternsUtils.isUnknowns(val)) { // pattern #5
                    nUnknowns++;
                } else if (VariabilityPatternsUtils.isConstrained(val)) { // pattern #2
                    nConstrained++;
                } else { // pattern #3
                    nSingleV++;
                }
            }

        }

        catalogStat.setnConstrained(nConstrained);
        catalogStat.setnUnknowns(nUnknowns);
        catalogStat.setnSingleV(nSingleV);
        catalogStat.setnMultiValues(nMulti);
        catalogStat.setnEmpty(nEmpty);
        catalogStat.setnBooleans(nBoolean);
        catalogStat.setnUncertains(nUncertain);
        catalogStats.add(catalogStat);

    }

    int nTable = catalogs.size();

    return new PCMStatistic(nTable, catalogStats);

}

From source file:ExtractorContentTest.java

private FeatureModelVariable executeWikipediaToFML(String wikiPageName, String[] excludeColumnNames,
        String[] excludeProductNames, String[] excludeSectionNames, Map<String, String> renamings)
        throws Exception {

    WikiPageContentExtractor wikipediaExtractor = new WikiPageContentExtractor();

    String content = wikipediaExtractor.getContent(wikiPageName);

    assertNotNull(content);/*from www . jav a2 s .c o m*/
    //System.err.println("content = " + content);

    WikiTabularExtractor wikiTabExtractor = new WikiTabularExtractor();

    //content = "'''Video converters''' are [[computer program]]s" ; 
    String htmlContent = wikiTabExtractor.run(content, "video");

    assertNotNull(htmlContent);

    //Document doc = Jsoup.connect("http://en.wikipedia.org/w/index.php?title=" + wikiPageName).get();
    Document doc = Jsoup.parse(htmlContent);
    FileUtils.writeStringToFile(new File("output/" + wikiPageName + ".html"), doc.toString());

    //Element docContentEntryPoint = doc ; // doc.getElementsByClass("article-content").first(); 
    //Elements sections = docContentEntryPoint.getElementsByClass("section") ; 
    // FIXME what about no section ?
    //treatSection(doc.body());

    Elements tabs = doc.select("table");

    List<Catalog> catalogs = new ArrayList<Catalog>();
    for (Element section : tabs) {
        treatTable(section, catalogs);

    }
    /*for (Element section : sections) {
       treatSection (section, catalogs);
               
    }*/

    // set the "ID" / names
    // clean up

    // FIXME here it is specific 

    for (Catalog catalog : catalogs) {
        for (String columnName : excludeColumnNames) {
            if (!catalog.hasHeader(columnName))
                continue;
            if (!catalog.removeColumn(columnName)) {
                System.err.println("Unable to remove the column " + columnName);
            }
        }

    }

    Set<String> excludeProductIDs = new HashSet<String>(Arrays.asList(excludeProductNames));
    Set<String> excludeSections = new HashSet<String>(Arrays.asList(excludeSectionNames));

    List<FeatureModelVariable> fmvs = new ArrayList<FeatureModelVariable>();
    for (Catalog catalog : catalogs) {
        String catalogName = catalog.getName();
        if (excludeSections.contains(catalogName))
            continue;
        System.err.println("***" + catalogName + "****");
        /*
        if (!catalog.getName().equals("General information"))
           continue ; */
        for (Product product : catalog) {
            FeatureModelVariable fmv = product.toFeatureDiagram();
            /*
             * POST
             */

            // renaming

            Set<String> oFts = renamings.keySet(); // features to rename
            for (String oFt : oFts) {
                fmv.renameFeature(oFt, renamings.get(oFt));
            }

            String id = fmv.getIdentifier();
            if (!excludeProductIDs.contains(id))
                fmvs.add(fmv);

        }
        //System.err.println("\n\nfmvs=" + fmvs);
    }

    List<FeatureModelVariable> fmvsToMerge = new ArrayList<FeatureModelVariable>();

    if (catalogs.size() == 1) {
        fmvsToMerge = fmvs;
    }
    // aggregate feature models with same identifiers when there are numerous catalogs (dimensions)
    else {
        Set<String> idsDone = new HashSet<String>();
        for (FeatureModelVariable fmv : fmvs) {
            String id1 = fmv.getIdentifier();
            if (idsDone.contains(id1))
                continue;
            //System.err.println("Aggregating..." + id1) ; // + " = " + fmv);

            List<FeatureModelVariable> toAggreagte = new ArrayList<FeatureModelVariable>();

            for (FeatureModelVariable fmv2 : fmvs) {
                String id2 = fmv2.getIdentifier();
                if (id1.equals(id2)) {
                    toAggreagte.add(fmv2);
                }
            }
            if (!toAggreagte.isEmpty()) {
                fmvsToMerge.add(new AggregatorFM().build(toAggreagte, new HashSet<Expression<String>>(),
                        _interop(wikiPageName)));
            } else {
                System.err.println("Didn't find another for " + id1);
                continue;
            }

            idsDone.add(id1);
        }
    }

    // serialize product by product (for debug)
    StringBuffer sb = new StringBuffer();
    int i = 0;
    for (FeatureModelVariable fmv : fmvsToMerge) {
        sb.append("fmProduct" + i++ + " = FM (" + fmv + "\n)\n\n");
    }
    File f = new File(OUTPUT_DIRECTORY + wikiPageName + "_FMLMergingScript" + ".fml");
    FileUtils.writeStringToFile(f, sb.toString());

    FMLMergerBDD fmlMerger = new FMLMergerBDD(fmvsToMerge, _builder); // 

    FeatureModelVariable fmMerged = null;

    _shell.setVerbose(true);
    boolean _SAT_EVALUATION = false;
    if (_SAT_EVALUATION) {
        fmMerged = new FMLMergerDisjunctiveSAT(fmvsToMerge).union();
        fmMerged.setIdentifier(wikiPageName);
        return fmMerged;
    }

    boolean _SAT_EVALUATION_2 = false;
    if (_SAT_EVALUATION_2) {
        Collection<Expression<String>> exprs = new TseitinTransformationDisjunctive(
                fmvsToMerge.toArray(new FeatureModelVariable[] {})).compute(); //new TseitinTransformation(_z3, b12).compute();
        //System.err.println("exprs:" + exprs);
        // SMT bridges
        System.err.println("" + new FeatureModelVariableSATFormula("",
                new SATFMLFormula(ExpressionUtility.mkConjunction(exprs))).computeImplicationGraph());
        return null;
    }

    //Formula<String> flaMerged = fmlMerger.calculateFormula(Mode.StrictUnion);
    //System.err.println("#fla=" + flaMerged.getDomain().size());

    fmMerged = fmlMerger.union(new KSynthesisConfiguration() {

        @Override
        public boolean isAddingCrossTreeConstraints() {
            return false; //false;
        }

        @Override
        public boolean hasOrGroupSupport() {
            return false;
        }

    });

    // post-process: mandatory status for       
    for (Catalog catalog : catalogs) {
        String catalogName = catalog.getName();
        if (excludeSections.contains(catalogName))
            continue;
        if (fmMerged.features().names().contains(catalogName)) {
            fmMerged.setMandatory(fmMerged.getFeature(catalogName));
            //  fmMerged.addConstraint(new Expression<String>(catalogName));
            fmMerged.getFormula()
                    .andWith(new Formula<String>(_builder.mkExpression(new Expression<String>(catalogName)),
                            Arrays.asList(catalogName), _builder));
        }
    }

    fmMerged.setIdentifier(wikiPageName);
    return fmMerged;
}

From source file:com.pagecrumb.proxy.util.filter.HtmlProxyTransformParser.java

public HtmlProxyTransformParser(String html, final String targetServer) throws ParserException {

    log.debug("Creating Html Parser Object.");

    // TODO Using targetServer directly is dangerous
    // because there might be URL passed which is not absolute URL.
    // its either the URL is decoded using some utilities to get
    // absolute domain

    this.targetServer = targetServer;

    log.info(this.getClass().toString() + " " + "Requested URL: " + this.targetServer);

    NodeVisitor linkvisitor = new NodeVisitor() {

        @Override/*from   w  w  w.  ja v a 2  s.co m*/
        public void visitTag(Tag tag) {
            String name = tag.getTagName();

            if ("link".equalsIgnoreCase(name)) {
                String hrefValue = tag.getAttribute("href");
                if (hrefValue != null && !hrefValue.startsWith("/proxy")) {
                    if (hrefValue.startsWith("http://") || hrefValue.startsWith("https://")) { // add more protocols later
                        log.info("Rewriting with targetServer: " + hrefValue);
                        hrefValue = hostServlet + hrefValue;
                    }
                    if (!hrefValue.startsWith("http://")
                            || !hrefValue.startsWith("https://") && !hrefValue.startsWith("/proxy")) { // add more protocols later

                        if (!hrefValue.startsWith("/")) {
                            hrefValue = "/" + hrefValue;
                        }

                        if (hrefValue.startsWith("/") && !hrefValue.startsWith("/proxy")) {
                            log.info("Rewriting with targetServer: " + hrefValue);
                            hrefValue = hostServlet + targetServer + hrefValue;
                        }
                        hrefValue = hrefValue.replaceAll("&", "&amp;");
                        tag.setAttribute("href", hrefValue);
                        log.debug("hrefValue=" + hrefValue);
                    }
                }
            }

            /**
             * Anchor 
             */
            if ("a".equalsIgnoreCase(name)) {
                String hrefValue = tag.getAttribute("href");

                if (hrefValue != null && !hrefValue.startsWith("/proxy")) // Prevent over re-writing the proxy strings
                {
                    log.debug("hrefValue=" + hrefValue);

                    if (hrefValue.startsWith("//")) {
                        hrefValue = "http:" + hrefValue;
                    }

                    if (hrefValue.startsWith("http://") || hrefValue.startsWith("https://")) { // add more protocols later
                        log.info("Rewriting with targetServer: " + hrefValue);
                        hrefValue = hostServlet + hrefValue;
                    }

                    // TODO Check if the href value is just a filename e.g "home.html"

                    /**
                     * Mail Protocol
                     */
                    else if (hrefValue.startsWith("mailto:")) {

                    }
                    /**
                     * HTTP Protocol
                     */
                    else if (!hrefValue.startsWith("http://")
                            || !hrefValue.startsWith("https://") && !hrefValue.startsWith("/proxy")) { // add more protocols later
                        // TODO Must run hrefValue in malformed URL fix, to fix problems with the URL
                        // i.e. "double slash" http://127.0.0.1:8888/proxy?http://localhost:8080/docs//introduction.html
                        // reason could be that target server end with "/"
                        if (!hrefValue.startsWith("/")) {
                            hrefValue = "/" + hrefValue;
                            hrefValue = hostServlet + targetServer + hrefValue;
                        } else if (hrefValue.startsWith("/") && !hrefValue.startsWith("/proxy")) {
                            log.info("Rewriting with targetServer: " + hrefValue);
                            hrefValue = hostServlet + targetServer + hrefValue;
                        }

                    }

                    hrefValue = hrefValue.replaceAll("&", "&amp;");
                    tag.setAttribute("href", hrefValue);
                    log.debug("hrefValue=" + hrefValue);

                }
            }

            // TODO hostServletNoFilter is intended to be used for 
            // non page documents, like .js or .css
            // this way it will not run through the filter URL rewriting

            if ("script".equalsIgnoreCase(name)) {
                String srcValue = tag.getAttribute("src");
                if (srcValue != null && !srcValue.startsWith("/")) {
                    srcValue = "/" + srcValue;
                    srcValue = /*hostServer +*/ hostServlet + targetServer + srcValue;
                }
                if (srcValue != null && srcValue.startsWith("//")) { // special case (see YouTube)
                    //srcValue = /*hostServer +*/ hostServletNoFilter + targetServer + srcValue;
                    srcValue = "http:" + srcValue;
                    srcValue = hostServlet + srcValue;
                }
                if (srcValue != null) {
                    tag.setAttribute("src", srcValue);
                }
                log.debug("srcValue=" + srcValue);
            }

            if ("form".equalsIgnoreCase(name)) {
                String actionValue = tag.getAttribute("action");
                if (actionValue != null && !actionValue.startsWith("/")) {
                    actionValue = "/" + actionValue;
                    actionValue = hostServlet + targetServer + actionValue;
                }
                if (actionValue != null && actionValue.startsWith("/")) {
                    actionValue = hostServlet + targetServer + actionValue;
                }
                if (actionValue != null) {
                    tag.setAttribute("action", actionValue);
                }
                log.debug("actionValue=" + actionValue);
            }

            /**
             * Get javascripts
             */
            if ("script".equalsIgnoreCase(name)) {
                ScriptTag script = (ScriptTag) tag;
                if (script != null) {
                    //String text = script.getStringText();
                    //final AstNode astRoot = new org.mozilla.javascript.Parser().parse(text, "", 1);
                    //log.info("Script_from_parser="+astRoot.toSource());
                    //log.info("script="+text);
                    // Parse the script? based on the documented activities.
                }
            }

            if ("img".equalsIgnoreCase(name)) {
                String srcValue = tag.getAttribute("src");
                if (srcValue != null && !srcValue.startsWith("/proxy")) {
                    if (srcValue.startsWith("http://") || srcValue.startsWith("https://")) { // add more protocols later
                        log.info("Rewriting with targetServer: " + srcValue);
                        srcValue = hostServlet + srcValue;
                    }
                    if (!srcValue.startsWith("http://")
                            || !srcValue.startsWith("https://") && !srcValue.startsWith("/proxy")) { // add more protocols later

                        if (!srcValue.startsWith("/")) {
                            srcValue = "/" + srcValue;
                            srcValue = hostServlet + targetServer + srcValue;
                        }

                        if (srcValue.startsWith("/") && !srcValue.startsWith("/proxy")) {
                            log.info("Rewriting with targetServer: " + srcValue);
                            srcValue = hostServlet + targetServer + srcValue;
                        }
                        srcValue = srcValue.replaceAll("&", "&amp;");
                        tag.setAttribute("src", srcValue);
                        log.debug("srcValue=" + srcValue);
                    }
                }
            }

        }
    };

    Parser parser = new Parser(html, null);

    NodeList nl = parser.parse(null);

    nl.visitAllNodesWith(linkvisitor);

    this.html = nl.toHtml();

    Document doc = Jsoup.parse(this.html);

    //Element bScriptElement = new Element(org.jsoup.parser.Tag.valueOf("script"), "");
    //bScriptElement.attr("src", "/browz.js");
    //bScriptElement.attr("type", "text/javascript");
    //bScriptElement.attr("language", "javascript");

    Element jqEl = new Element(org.jsoup.parser.Tag.valueOf("script"), "");
    jqEl.attr("src", "/jquery.min.js");
    jqEl.attr("type", "text/javascript");
    jqEl.attr("language", "javascript");

    Element bzEl = new Element(org.jsoup.parser.Tag.valueOf("script"), "");
    bzEl.attr("src", "/browz.js");
    bzEl.attr("type", "text/javascript");
    bzEl.attr("language", "javascript");

    //doc.select("head").first().children().first().before("<script type=\"text/javascript\" language=\"javascript\">" 
    //      + readFileAsString("browz.js") + "</script>");

    // Important! Removed to satisfy error, must be reviewed

    //      doc.select("head").first().children().first().before(bzEl);
    //      doc.select("head").first().children().first().before(jqEl);
    /*
    for (Element el : doc.getElementsByTag("html")) {
       jqEl = el.appendElement("script");
       jqEl.attr("src", "/jquery.min.js");
       jqEl.attr("type", "text/javascript");
       jqEl.attr("language", "javascript");
               
       bzEl = el.appendElement("script");
       bzEl.attr("src", "/browz.js");
       bzEl.attr("type", "text/javascript");
       bzEl.attr("language", "javascript");
    }
    */
    this.html = doc.toString();

}

From source file:tr.edu.gsu.nerwip.retrieval.reader.wikipedia.WikipediaReader.java

/**
 * Loads the html source code from the cached file,
 * or fetches it from the web server if needed.
 * //from  w ww  . j  av a2 s  .com
 * @param name
 *       Name of the concerned article.
 * @param url
 *       URL of the concerned article.
 * @return
 *       The DOM representation of the original page.
 * 
 * @throws IOException
 *       Problem while accessing the cache or web page.
 */
private Document retrieveSourceCode(String name, URL url) throws IOException {
    Document result = null;
    logger.increaseOffset();
    logger.log("Retrieve HTML source code");

    // check if the cache can/must be used
    String folderPath = FileNames.FO_OUTPUT + File.separator + name;
    File originalFile = new File(folderPath + File.separator + FileNames.FI_ORIGINAL_PAGE);
    if (cache && originalFile.exists()) {
        logger.log("Cache enabled and HTML already retrieved >> we use the cached file ("
                + originalFile.getName() + ")");
        String sourceCode = FileTools.readTextFile(originalFile);
        result = Jsoup.parse(sourceCode);
    }

    // otherwise, load and cache the html file
    else {
        logger.log("Cache disabled or HTML never retrieved before>> we get it from the web server");

        // use custom page loader
        //         String sourceCode = manuallyReadUrl(url);
        //         System.out.println(sourceCode.toString());
        //         result = new Source(sourceCode);

        // use jericho page loader
        int timeOut = 5000;
        result = Jsoup.parse(url, timeOut);
        String sourceCode = result.toString();

        // cache html source code
        FileTools.writeTextFile(originalFile, sourceCode);
    }

    //System.out.println(source.toString());
    logger.decreaseOffset();
    return result;
}

From source file:net.acesinc.convergentui.ConvergentUIResponseFilter.java

@Override
public Object run() {

    String origBody = contentManager.getDownstreamResponse();
    if (origBody == null || origBody.isEmpty()) {
        return null;
    }/* w  ww  . j  a v  a 2  s  . co m*/

    String composedBody = null;
    log.trace("Response from downstream server: " + origBody);

    Document doc = Jsoup.parse(origBody);
    if (hasReplaceableElements(doc)) {
        log.debug("We have replaceable elements. Let's get em!");
        Elements elementsToUpdate = doc.select("div[data-loc]");
        for (Element e : elementsToUpdate) {
            StringBuilder content = new StringBuilder();
            String location = e.dataset().get("loc");
            String fragmentName = e.dataset().get("fragment-name");
            String cacheName = e.dataset().get("cache-name");
            boolean useCaching = !Boolean.valueOf(e.dataset().get("disable-caching"));
            boolean failQuietly = Boolean.valueOf(e.dataset().get("fail-quietly"));
            URL url = null;
            try {
                url = new URL(location);
                String protocol = url.getProtocol();
                String service = url.getHost();

                log.debug("Fetching content at location [ " + location + " ] with cacheName = [ " + cacheName
                        + " ]");

                try {
                    RequestContext context = RequestContext.getCurrentContext();
                    ContentResponse response = contentManager.getContentFromService(location, cacheName,
                            useCaching, context);

                    log.trace(response.toString());

                    if (!response.isError()) {
                        Object resp = response.getContent();
                        if (String.class.isAssignableFrom(resp.getClass())) {
                            String subContentResponse = (String) resp;
                            //TODO You better trust the source of your downstream HTML!
                            //                    String cleanedContent = Jsoup.clean(subContentResponse, Whitelist.basic()); //this totally stripped the html out...
                            Document subDocument = Jsoup.parse(subContentResponse);

                            if (fragmentName != null) {
                                Elements fragments = subDocument
                                        .select("div[data-fragment-name=\"" + fragmentName + "\"]");

                                if (fragments != null && fragments.size() > 0) {
                                    if (fragments.size() == 1) {
                                        Element frag = fragments.first();

                                        //need to see if there are images that we need to replace the urls on
                                        Elements images = frag.select("img");
                                        for (Element i : images) {
                                            String src = i.attr("src");
                                            if (src.startsWith("/") && !src.startsWith("//")) {
                                                i.attr("src", "/cui-req://" + protocol + "://" + service + src);
                                            } //else what do we do about relative urls?
                                        }

                                        content.append(frag.toString());

                                    } else {
                                        for (Element frag : fragments) {
                                            content.append(frag.toString()).append("\n\n");
                                        }
                                    }
                                } else {
                                    log.debug("Found no matching fragments for [ " + fragmentName + " ]");
                                    if (failQuietly) {
                                        content.append("<div class='cui-error'></div>");
                                    } else {
                                        content.append(
                                                "<span class='cui-error'>Failed getting content from remote service. Possible reason in reponse below</span>");
                                        content.append(subDocument.toString());
                                    }
                                }
                            } else {
                                //take the whole thing and cram it in there!
                                content.append(subDocument.toString());
                            }
                        } else {
                            //not text...
                            if (!failQuietly) {
                                content.append(
                                        "<span class='cui-error'>Failed getting content from remote service. Reason: content was not text</span>");
                            } else {
                                content.append("<div class='cui-error'></div>");
                            }
                        }

                    } else {
                        if (!failQuietly) {
                            content.append(
                                    "<span class='cui-error'>Failed getting content from remote service. Reason: "
                                            + response.getMessage() + "</span>");
                        } else {
                            content.append("<div class='cui-error'></div>");
                        }
                    }

                    //now append it to the page
                    if (!content.toString().isEmpty()) {
                        e.html(content.toString());
                    }
                } catch (Throwable t) {
                    if (!failQuietly) {
                        e.html("<span class='cui-error'>Failed getting content from remote service. Reason: "
                                + t.getMessage() + "</span>");
                    }
                    log.warn("Failed replacing content", t);
                }
            } catch (MalformedURLException ex) {
                log.warn("location was invalid: [ " + location + " ]", ex);
                if (!failQuietly) {
                    content.append(
                            "<span class='cui-error'>Failed getting content from remote service. Reason: data-loc was an invalid location.</span>");
                } else {
                    content.append("<div class='cui-error'></div>");
                }
            }

        }

        composedBody = doc.toString();
    } else {
        log.debug("Document has no replaeable elements. Skipping");
    }

    try {
        addResponseHeaders();
        if (composedBody != null && !composedBody.isEmpty()) {
            writeResponse(composedBody, getMimeType(RequestContext.getCurrentContext()));
        } else {
            writeResponse(origBody, getMimeType(RequestContext.getCurrentContext()));
        }
    } catch (Exception ex) {
        log.error("Error sending response", ex);

    }
    return null;
}

From source file:tr.edu.gsu.nerwip.retrieval.reader.wikipedia.WikipediaReader.java

/**
 * Pulls a text from a Wikipedia URL without images, tags, etc.
 * /* w w w  . jav  a 2  s  . c o m*/
 * @param url
 *       Address of the targetted text.
 * @return
 *       An Article object representing the retrieved object.
 * 
 * @throws ReaderException
 *       Problem while retrieving the text.
 */
@Override
public Article read(URL url) throws ReaderException {
    Article result = null;
    String name = getName(url);

    try { // get the page
        String address = url.toString();
        logger.log("Retrieving page " + address);
        long startTime = System.currentTimeMillis();
        Document document = retrieveSourceCode(name, url);

        // get its title
        Element firstHeadingElt = document.getElementsByAttributeValue(XmlNames.ATT_ID, ID_TITLE).get(0);
        String title = firstHeadingElt.text();
        logger.log("Get title: " + title);

        // get raw and linked texts
        logger.log("Get raw and linked texts.");
        StringBuilder rawStr = new StringBuilder();
        StringBuilder linkedStr = new StringBuilder();
        Element bodyContentElt = document.getElementsByAttributeValue(XmlNames.ATT_ID, ID_CONTENT).get(0);
        // processing each element in the content part
        boolean ignoringSection = false;
        boolean first = true;
        for (Element element : bodyContentElt.children()) {
            String eltName = element.tag().getName();
            String eltClass = element.attr(XmlNames.ATT_CLASS);

            // section headers
            if (eltName.equals(XmlNames.ELT_H2)) {
                first = false;
                // get section name
                StringBuilder fakeRaw = new StringBuilder();
                StringBuilder fakeLinked = new StringBuilder();
                processParagraphElement(element, fakeRaw, fakeLinked);
                String str = fakeRaw.toString().trim().toLowerCase(Locale.ENGLISH);
                // check section name
                if (IGNORED_SECTIONS.contains(str))
                    ignoringSection = true;
                else {
                    ignoringSection = false;
                    rawStr.append("\n-----");
                    linkedStr.append("\n-----");
                    processParagraphElement(element, rawStr, linkedStr);
                }
            }

            else if (!ignoringSection) { // lower sections
                if (eltName.equals(XmlNames.ELT_H3) || eltName.equals(XmlNames.ELT_H4)
                        || eltName.equals(XmlNames.ELT_H5) || eltName.equals(XmlNames.ELT_H6)) {
                    first = false;
                    processParagraphElement(element, rawStr, linkedStr);
                }

                // paragraph
                else if (eltName.equals(XmlNames.ELT_P)) {
                    String str = element.text();
                    // ignore possible initial disambiguation link
                    if (!first || !str.startsWith(PARAGRAPH_FORTHE)) {
                        first = false;
                        processParagraphElement(element, rawStr, linkedStr);
                    }
                }

                // list
                else if (eltName.equals(XmlNames.ELT_UL)) {
                    first = false;
                    processListElement(element, rawStr, linkedStr, false);
                } else if (eltName.equals(XmlNames.ELT_OL)) {
                    first = false;
                    processListElement(element, rawStr, linkedStr, true);
                } else if (eltName.equals(XmlNames.ELT_DL)) {
                    first = false;
                    processDescriptionListElement(element, rawStr, linkedStr);
                }

                // tables
                else if (eltName.equals(XmlNames.ELT_TABLE)) {
                    first = !processTableElement(element, rawStr, linkedStr);
                }

                // divisions
                else if (eltName.equals(XmlNames.ELT_DIV)) { // ignore possible initial picture 
                    if (!first || eltClass == null || !eltClass.contains(CLASS_THUMB))
                        first = !processDivisionElement(element, rawStr, linkedStr);
                }

                // we ignore certain types of span (phonetic trancription, WP buttons...) 
                else if (eltName.equals(XmlNames.ELT_SPAN)) {
                    first = !processSpanElement(element, rawStr, linkedStr);
                }

                // hyperlinks must be included in the linked string, provided they are not external
                else if (eltName.equals(XmlNames.ELT_A)) {
                    first = !processHyperlinkElement(element, rawStr, linkedStr);
                }

                // quotes are just processed recursively
                else if (eltName.equals(XmlNames.ELT_BLOCKQUOTE)) {
                    first = !processQuoteElement(element, rawStr, linkedStr);
                }

                // other tags are ignored
            }
        }

        // create article object
        result = new Article(name);
        result.setTitle(title);
        result.setUrl(url);
        result.initDate();

        // clean text
        String rawText = rawStr.toString();
        rawText = cleanText(rawText);
        //         rawText = ArticleCleaning.replaceChars(rawText);
        result.setRawText(rawText);
        logger.log("Length of the raw text: " + rawText.length() + " chars.");
        String linkedText = linkedStr.toString();
        linkedText = cleanText(linkedText);
        //         linkedText = ArticleCleaning.replaceChars(linkedText);
        result.setLinkedText(linkedText);
        logger.log("Length of the linked text: " + linkedText.length() + " chars.");

        // get original html source code
        logger.log("Get original HTML source code.");
        String originalPage = document.toString();
        result.setOriginalPage(originalPage);
        logger.log("Length of the original page: " + originalPage.length() + " chars.");

        // get the categories of the article 
        List<ArticleCategory> categories = getArticleCategories(result);
        result.setCategories(categories);

        long endTime = System.currentTimeMillis();
        logger.log("Total duration: " + (endTime - startTime) + " ms.");
    } catch (ClientProtocolException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (org.json.simple.parser.ParseException e) {
        e.printStackTrace();
    }

    return result;
}

From source file:moose.com.ac.ArticleViewActivity.java

private void filterImg(String str) {
    Document mDocument = Jsoup.parse(str);

    Elements imgs = mDocument.select("img");
    for (int imgIndex = 0; imgIndex < imgs.size(); imgIndex++) {
        Element img = imgs.get(imgIndex);
        String src = img.attr("src").trim();
        if (TextUtils.isEmpty(src))
            continue;
        Uri parsedUri = Uri.parse(src);//from   w  ww .jav a2  s. co  m
        if ("file".equals(parsedUri.getScheme()))
            continue;
        if (parsedUri.getPath() == null)
            continue;
        if (!"http".equals(parsedUri.getScheme())) {
            parsedUri = parsedUri.buildUpon().scheme("http").authority("www.acfun.tv").build();
        }
        // url may have encoded path
        parsedUri = parsedUri.buildUpon().path(parsedUri.getPath()).build();
        src = parsedUri.toString();
        Log.i(TAG, "image src:" + src);
        img.attr("org", src);
        if (CommonUtil.getMode() == 1 && !CommonUtil.isWifiConnected(mContext)) {//
            Log.i(TAG, "[?]");
            img.after("<div style=\"width: 100%;text-align: center;\"><br><p>[]</p></div>");
        } else {
            Log.i(TAG, "[?]");
            StringBuilder builder = new StringBuilder();
            builder.append("<div style='text-align: center;'><br>")
                    .append("<img src='file:///android_asset/loading.gif'").append("name = '").append(src)
                    .append("'\n;onclick = window.JsBridge.showImage('").append(src).append("')")
                    .append(" alt=' '/>\n").append("</div>");
            img.after(builder.toString());
            Log.i(TAG, "image:table:-" + builder.toString());
        }
        /*if (CommonUtil.getMode() == 1 && !CommonUtil.isWifiConnected(mContext)) {
        img.after("<p >[]</p>");
        } else if (!src.contains(Config.AC_EMOTION)) {
        StringBuilder builder = new StringBuilder();
        builder.append("<div style=\"width: 100%;text-align: center;\"><br><img src=\"")
                .append(src)
                .append("\" width=: 100%;height:auto\"")
                .append(" alt=\" \"/>\n")
                .append("</div>");
        Log.i(TAG, "index image:" + builder.toString());
        img.after(builder.toString());
        } else {
        img.after("<img src=\"" + src + "\" alt=\" \"/>\n");
        }*/

        img.remove();
        //img.removeAttr("style");
        HtmlBody = mDocument.toString();
        Log.i(TAG, "??html:" + HtmlBody);
    }
}

From source file:org.craftercms.social.migration.controllers.MainController.java

protected void getHtml(final FileWriter writer) throws TransformerException, IOException {
    final URL in = getClass().getResource(
            MigrationTool.systemProperties.getString("crafter" + ".migration" + "" + ".loggerTemplate"));
    if (in == null) {
        log.error("Unable to find {} "
                + MigrationTool.systemProperties.getString("crafter" + ".migration" + "" + ".loggerTemplate"));
    }/*from ww w .j  a va 2 s  .c om*/
    final Document loggingDoc = Jsoup.parse(IOUtils.toString(in));
    final Element logs = loggingDoc.getElementById("logs");
    for (Object o : logTable.getItems()) {
        if (o instanceof UserLogEntry) {
            UserLogEntry userLogEntry = (UserLogEntry) o;
            String dateFormat = new SimpleDateFormat("yyyy MM dd hh:mm:ss zzz").format(userLogEntry.getDate());
            final Element tr = loggingDoc.createElement("tr");
            tr.attr("class", userLogEntry.getLevel().getCssClass());
            final Element tmigrator = loggingDoc.createElement("td");
            final Element tdate = loggingDoc.createElement("td");
            final Element tmessage = loggingDoc.createElement("td");
            tmessage.attr("class", "text-center");
            tmessage.text(userLogEntry.getMessage());
            tdate.text(dateFormat);
            tmigrator.text(userLogEntry.getSource());
            tr.appendChild(tmigrator);
            tr.appendChild(tdate);
            tr.appendChild(tmessage);
            logs.appendChild(tr);
        }
    }
    IOUtils.write(loggingDoc.toString(), writer);
    //        Transformer transformer = TransformerFactory.newInstance().newTransformer();
    //        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
    //        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
    //        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    //        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
    //        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
    //        transformer.transform(new DOMSource(loggingDoc), new StreamResult(writer));
    writer.flush();
    writer.close();
}

From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java

public static void main(String[] args) {
    System.out.println(removeTags("Ceci est un test <a href=\"tutu\">slurp</a> hop <code>arlgs</code>.",
            new String[] { "a", "body" }));
    String content = getTypeContent(null, "mdn", JSweetDefTranslatorConfig.LANG_PACKAGE, "Array");
    Document doc = Jsoup.parse(content, "UTF-8");
    System.out.println(doc.toString());
}

From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java

@Override
public List<MediaTrailer> getTrailers(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getTrailers() " + options.toString());
    List<MediaTrailer> trailers = new ArrayList<>();
    if (!MetadataUtil.isValidImdbId(options.getImdbId())) {
        LOGGER.debug("IMDB id not found");
        return trailers;
    }//w  w  w. j  a v  a 2 s  .  com
    /*
     * function getTrailerData(ci) { switch (ci) { case 'http://de.clip-1.filmtrailer.com/9507_31566_a_1.flv?log_var=72|491100001 -1|-' : return
     * '<b>Trailer 1</b><br><i>(small)</i><br><br>&raquo; 160px<br><br>Download:<br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_31566_a_1.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_31566_a_2.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(medium)</i><br><br>&raquo;
     * 240px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_2.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_31566_a_3.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(large)</i><br><br>&raquo;
     * 320px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_3.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_3.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_31566_a_3.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_31566_a_4.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(xlarge)</i><br><br>&raquo;
     * 400px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_4.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_4.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_31566_a_4.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_31566_a_5.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(xxlarge)</i><br><br>&raquo;
     * 640px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_31566_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_1.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(small)</i><br><br>&raquo;
     * 160px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_1.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_2.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(medium)</i><br><br>&raquo;
     * 240px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_2.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_3.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(large)</i><br><br>&raquo;
     * 320px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_3.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_3.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_39003_a_3.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_4.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(xlarge)</i><br><br>&raquo;
     * 400px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_4.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_4.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_39003_a_4.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_5.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(xxlarge)</i><br><br>&raquo;
     * 640px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_39003_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>'; } }
     */
    Url url = null;
    String searchString = BASE_URL + "/view.php?page=suchergebnis&Kat=IMDb&SText=" + options.getImdbId();
    try {
        // search with IMDB
        url = new Url(searchString);
        InputStream in = url.getInputStream();
        Document doc = Jsoup.parse(in, "UTF-8", "");
        in.close();
        Elements filme = doc.getElementsByAttributeValueMatching("href", "film\\/\\d+,");
        if (filme == null || filme.isEmpty()) {
            LOGGER.debug("found no search results");
            return trailers;
        }
        LOGGER.debug("found " + filme.size() + " search results"); // hopefully
                                                                   // only one

        LOGGER.debug("get (trailer) details page");
        url = new Url(BASE_URL + "/" + StrgUtils.substr(filme.first().toString(), "href=\\\"(.*?)\\\""));
        in = url.getInputStream();
        doc = Jsoup.parse(in, "UTF-8", "");
        in.close();

        // OLD STYLE
        // <b>Trailer 1</b><br><i>(xxlarge)</i><br><br>&raquo; 640px<br><br>Download:<br>&raquo; <a href=
        // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo; <a href=
        // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
        // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>
        Pattern regex = Pattern.compile("return '(.*?)';");
        Matcher m = regex.matcher(doc.toString());
        while (m.find()) {
            String s = m.group(1);
            String tname = StrgUtils.substr(s, "<b>(.*?)</b>");
            String tpix = StrgUtils.substr(s, "raquo; (.*?)x<br>");
            // String tqual = StrgUtils.substr(s, "<i>\\((.*?)\\)</i>");

            // url + format
            Pattern lr = Pattern.compile("<a href=\"(.*?)\">(.*?)</a>");
            Matcher lm = lr.matcher(s);
            while (lm.find()) {
                String turl = lm.group(1);
                // String tformat = lm.group(2);
                MediaTrailer trailer = new MediaTrailer();
                trailer.setName(tname);
                // trailer.setQuality(tpix + " (" + tformat + ")");
                trailer.setQuality(tpix);
                trailer.setProvider("filmtrailer");
                trailer.setUrl(turl);
                LOGGER.debug(trailer.toString());
                trailers.add(trailer);
            }
        }

        // NEW STYLE (additional!)
        // <div class="clips" id="clips2" style="display: none;">
        // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12">&nbsp;
        // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren">&nbsp;
        // <i>Trailer 1:</i>
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_1.flv?log_var=67|491100001-1|-">&nbsp;small&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_2.flv?log_var=67|491100001-1|-">&nbsp;medium&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_3.flv?log_var=67|491100001-1|-">&nbsp;large&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_4.flv?log_var=67|491100001-1|-">&nbsp;xlarge&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_5.flv?log_var=67|491100001-1|-">&nbsp;xxlarge&nbsp;</a> &nbsp;
        // <br>
        // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12">&nbsp;
        // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren">&nbsp;
        // <i>Trailer 2:</i>
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_1.flv?log_var=67|491100001-1|-">&nbsp;small&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_2.flv?log_var=67|491100001-1|-">&nbsp;medium&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_3.flv?log_var=67|491100001-1|-">&nbsp;large&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_4.flv?log_var=67|491100001-1|-">&nbsp;xlarge&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_5.flv?log_var=67|491100001-1|-">&nbsp;xxlarge&nbsp;</a> &nbsp;
        // <br>
        // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12">&nbsp;
        // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren">&nbsp;
        // <i>Trailer 3:</i>
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_1.flv?log_var=67|491100001-1|-">&nbsp;small&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_2.flv?log_var=67|491100001-1|-">&nbsp;medium&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_3.flv?log_var=67|491100001-1|-">&nbsp;large&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_4.flv?log_var=67|491100001-1|-">&nbsp;xlarge&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_5.flv?log_var=67|491100001-1|-">&nbsp;xxlarge&nbsp;</a> &nbsp;
        // <br>
        // <br>
        // </div>

        // new style size
        // 1 = 160 x 90 = small
        // 2 = 240 x 136 = medium
        // 3 = 320 x 180 = large
        // 4 = 400 x 226 = xlarge
        // 5 = 640 x 360 = xxlarge

        ;

        regex = Pattern.compile("<i>(.*?)</i>(.*?)<br>", Pattern.DOTALL); // get them as single trailer line
        m = regex.matcher(doc.getElementsByClass("clips").html());
        while (m.find()) {
            // LOGGER.info(doc.getElementsByClass("clips").html());
            // parse each line with 5 qualities
            String tname = m.group(1).trim();
            tname = tname.replaceFirst(":$", ""); // replace ending colon

            String urls = m.group(2);
            // url + format
            Pattern lr = Pattern.compile("<a href=\"(.*?)\">(.*?)</a>");
            Matcher lm = lr.matcher(urls);
            while (lm.find()) {
                String turl = lm.group(1);
                String tpix = "";
                String tformat = lm.group(2).replaceAll("&nbsp;", "").trim();
                switch (tformat) {
                case "small":
                    tpix = "90p";
                    break;

                case "medium":
                    tpix = "136p";
                    break;

                case "large":
                    tpix = "180p";
                    break;

                case "xlarge":
                    tpix = "226p";
                    break;

                case "xxlarge":
                    tpix = "360p";
                    break;

                default:
                    break;
                }
                MediaTrailer trailer = new MediaTrailer();
                trailer.setName(tname);
                // trailer.setQuality(tpix + " (" + tformat + ")");
                trailer.setQuality(tpix);
                trailer.setProvider("filmtrailer");
                trailer.setUrl(turl);
                LOGGER.debug(trailer.toString());
                trailers.add(trailer);
            }
        }
    } catch (Exception e) {
        if (url != null) {
            LOGGER.error("Error parsing {}", url.toString());
        } else {
            LOGGER.error("Error parsing {}", searchString);
        }

        throw e;
    }
    return trailers;
}