Example usage for org.jsoup.nodes Element id

List of usage examples for org.jsoup.nodes Element id

Introduction

In this page you can find the example usage for org.jsoup.nodes Element id.

Prototype

public String id() 

Source Link

Document

Get the id attribute of this element.

Usage

From source file:org.opens.tanaguru.rules.rgaa22.Rgaa22Rule03111.java

@Override
protected void select(SSPHandler sspHandler, ElementHandler<Element> elementHandler) {
    ELEMENT_SELECTOR.selectElements(sspHandler, elementHandler);

    if (elementHandler.isEmpty()) {
        return;/*w w w.j av a2 s. c  o  m*/
    }

    Elements elementsWithUniqueId = new Elements();
    // From the selected form elements, only keep the one with a unique id
    // on the page
    for (Element el : elementHandler.get()) {
        if (StringUtils.isNotEmpty(el.id().trim()) && CssLikeSelectorBuilder.getNumberOfElements(sspHandler,
                CssLikeSelectorBuilder.buildSelectorFromId(el.id())) == 1) {
            elementsWithUniqueId.add(el);
        }
    }
    // add the subset to the global selection
    elementHandler.clean().addAll(elementsWithUniqueId);

    if (elementsWithUniqueId.isEmpty()) {
        return;
    }

    for (Element el : elementsWithUniqueId) {
        String labelSelector = CssLikeSelectorBuilder.buildSelectorFromElementsAndAttributeValue(LABEL_ELEMENT,
                FOR_ATTR, el.id());
        if (CssLikeSelectorBuilder.getNumberOfElements(sspHandler, labelSelector) == 0) {
            this.elementsWithoutLabel.add(el);
        }
    }

}

From source file:org.opens.tanaguru.rules.rgaa30.Rgaa30Rule110102.java

@Override
protected void check(SSPHandler sspHandler, TestSolutionHandler testSolutionHandler) {

    /* If the page has no input form element, the test is not applicable */
    if (inputFormMap.entrySet().isEmpty()) {
        testSolutionHandler.addTestSolution(TestSolution.NOT_APPLICABLE);
        return;//  w w w.  j av  a 2 s  . c  om
    }

    for (Map.Entry<Element, ElementHandler<Element>> entry : inputFormMap.entrySet()) {
        /* The attribute Presence Checker */
        ElementChecker attributePresenceChecker = new AttributePresenceChecker(ID_ATTR, TestSolution.PASSED,
                TestSolution.FAILED, null, ID_MISSING_MSG);
        attributePresenceChecker.check(sspHandler, entry.getValue(), testSolutionHandler);

        /* The attribute Emptiness Checker. Keep default value i.e failed 
         when attribute is empty
         */
        ElementChecker attributeEmptinessChecker = new TextEmptinessChecker(
                new TextAttributeOfElementBuilder(ID_ATTR), ID_MISSING_MSG, null);
        attributeEmptinessChecker.check(sspHandler, entry.getValue(), testSolutionHandler);

        /* The id unicityChecker */
        ElementChecker idUnicityChecker = new IdUnicityChecker(ID_NOT_UNIQUE_MSG);
        idUnicityChecker.check(sspHandler, entry.getValue(), testSolutionHandler);
    }

    for (Map.Entry<Element, ElementHandler<Element>> entry : labelFormMap.entrySet()) {
        /* The attribute Presence Checker */
        ElementChecker attributePresenceChecker = new AttributePresenceChecker(FOR_ATTR, TestSolution.PASSED,
                TestSolution.FAILED, null, FOR_MISSING_MSG);
        attributePresenceChecker.check(sspHandler, entry.getValue(), testSolutionHandler);

        /* The attribute Emptiness Checker. Keep default value i.e failed 
         when attribute is empty
         */
        ElementChecker attributeEmptinessChecker = new TextEmptinessChecker(
                new TextAttributeOfElementBuilder(FOR_ATTR), FOR_MISSING_MSG, null);
        attributeEmptinessChecker.check(sspHandler, entry.getValue(), testSolutionHandler);
    }

    for (Map.Entry<Element, ElementHandler<Element>> entry : inputFormMap.entrySet()) {
        ElementHandler<Element> inputOnError = new ElementHandlerImpl();
        /* Check if each input id attribute is linked to a for attribute*/
        for (Element el : entry.getValue().get()) {
            String id = el.id();
            if (StringUtils.isNotBlank(id)) {
                ElementHandler<Element> linkedLabelToInputHandler = new ElementHandlerImpl();
                if (entry.getKey()
                        .select(LABEL_ELEMENT + " "
                                + CssLikeSelectorBuilder
                                        .buildSelectorFromElementsAndAttributeValue(INPUT_ELEMENT, ID_ATTR, id))
                        .isEmpty()) {
                    linkedLabelToInputHandler.addAll(entry.getKey().select(CssLikeSelectorBuilder
                            .buildSelectorFromElementsAndAttributeValue(LABEL_ELEMENT, FOR_ATTR, id)));
                    if (linkedLabelToInputHandler.isEmpty()) {
                        inputOnError.add(el);
                    }
                }
            }
        }
        ElementChecker elementPresenceChecker = new ElementPresenceChecker(TestSolution.FAILED,
                TestSolution.PASSED, INVALID_INPUT_MSG, null);
        elementPresenceChecker.check(sspHandler, inputOnError, testSolutionHandler);
    }

    for (Map.Entry<Element, ElementHandler<Element>> entry : labelFormMap.entrySet()) {
        ElementHandler<Element> labelOnError = new ElementHandlerImpl();
        /* Check if each label for attribute is associated to an input id attribute*/
        for (Element el : entry.getValue().get()) {
            String id = el.attr(FOR_ATTR);
            if (StringUtils.isNotBlank(id)) {
                ElementHandler<Element> linkedLabelToInputHandler = new ElementHandlerImpl();
                linkedLabelToInputHandler
                        .addAll(entry.getKey().select(CssLikeSelectorBuilder.buildSelectorFromId(id)));
                if (linkedLabelToInputHandler.isEmpty()) {
                    labelOnError.add(el);
                }
            }
        }
        ElementChecker elementPresenceChecker = new ElementPresenceChecker(TestSolution.FAILED,
                TestSolution.PASSED, INVALID_LABEL_MSG, null);
        elementPresenceChecker.check(sspHandler, labelOnError, testSolutionHandler);
    }
}

From source file:org.structr.web.importer.Importer.java

private DOMNode createChildNodes(final Node startNode, final DOMNode parent, final Page page,
        final boolean removeHashAttribute, final int depth) throws FrameworkException {

    DOMNode rootElement = null;/*from  www.  j a  va2s.  c  o m*/
    Linkable linkable = null;
    String instructions = null;

    final List<Node> children = startNode.childNodes();
    for (Node node : children) {

        String tag = node.nodeName();

        // clean tag, remove non-word characters except : and #
        if (tag != null) {
            tag = tag.replaceAll("[^a-zA-Z0-9#:.\\-_]+", "");
        }

        final StringBuilder classString = new StringBuilder();
        final String type = CaseHelper.toUpperCamelCase(tag);
        String comment = null;
        String content = null;
        String id = null;
        boolean isNewTemplateOrComponent = false;

        if (ignoreElementNames.contains(type)) {

            continue;
        }

        if (node instanceof Element) {

            final Element el = ((Element) node);
            final Set<String> classes = el.classNames();

            for (String cls : classes) {

                classString.append(cls).append(" ");
            }

            id = el.id();

            // do not download files when called from DeployCommand!
            if (!isDeployment) {

                String downloadAddressAttr = srcElements.contains(tag) ? "src"
                        : hrefElements.contains(tag) ? "href" : null;

                if (originalUrl != null && downloadAddressAttr != null
                        && StringUtils.isNotBlank(node.attr(downloadAddressAttr))) {

                    String downloadAddress = node.attr(downloadAddressAttr);
                    linkable = downloadFile(downloadAddress, originalUrl);
                } else {
                    linkable = null;
                }
            }

            if (removeHashAttribute) {

                // Remove data-structr-hash attribute
                node.removeAttr("data-structr-hash");
            }
        }

        // Data and comment nodes: Trim the text and put it into the "content" field without changes
        if (type.equals("#comment")) {

            comment = ((Comment) node).getData();
            tag = "";

            // Don't add content node for whitespace
            if (StringUtils.isBlank(comment)) {

                continue;
            }

            // store for later use
            commentSource.append(comment).append("\n");

            // check if comment contains instructions
            if (commentHandler != null && commentHandler.containsInstructions(comment)) {

                if (instructions != null) {

                    // unhandled instructions from previous iteration => empty content element
                    createEmptyContentNode(page, parent, commentHandler, instructions);
                }

                instructions = comment;
                continue;
            }

        } else if (type.equals("#data")) {

            tag = "";
            content = ((DataNode) node).getWholeData();

            // Don't add content node for whitespace
            if (StringUtils.isBlank(content)) {

                continue;
            }

        } else // Text-only nodes: Trim the text and put it into the "content" field
        {
            if (type.equals("#text")) {

                tag = "";

                if (isDeployment) {

                    content = trimTrailingNewline(((TextNode) node).getWholeText());

                    if (content == null || content.length() == 0) {
                        continue;
                    }

                } else {

                    content = trimTrailingNewline(((TextNode) node).text());

                    if (StringUtils.isBlank(content)) {
                        continue;
                    }
                }
            }
        }

        org.structr.web.entity.dom.DOMNode newNode = null;

        // create node
        if (StringUtils.isBlank(tag)) {

            if (page != null) {

                // create comment or content node
                if (!StringUtils.isBlank(comment)) {

                    final PropertyKey<String> contentTypeKey = StructrApp.key(Content.class, "contentType");

                    newNode = (DOMNode) page.createComment(comment);
                    newNode.setProperty(contentTypeKey, "text/html");

                } else {

                    newNode = (Content) page.createTextNode(content);
                }
            }

        } else if ("structr:template".equals(tag)) {

            final String src = node.attr("src");
            if (src != null) {

                DOMNode template = null;

                if (DeployCommand.isUuid(src)) {

                    template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class)
                            .and(GraphObject.id, src).getFirst();

                    if (template == null) {

                        System.out.println("##################################### template with UUID " + src
                                + " not found, this is a known bug");

                    }

                } else if (DeployCommand.endsWithUuid(src)) {
                    final String uuid = src.substring(src.length() - 32);
                    template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class)
                            .and(GraphObject.id, uuid).getFirst();

                    if (template == null) {

                        System.out.println("##################################### template with UUID " + uuid
                                + " not found, this is a known bug");

                    }

                } else {

                    template = Importer.findSharedComponentByName(src);
                    if (template == null) {

                        template = Importer.findTemplateByName(src);

                        if (template == null) {

                            template = createNewTemplateNode(parent, node.childNodes());
                            isNewTemplateOrComponent = true;

                        }
                    }
                }

                if (template != null) {

                    newNode = template;

                    if (template.isSharedComponent()) {

                        newNode = (DOMNode) template.cloneNode(false);

                        newNode.setSharedComponent(template);
                        newNode.setOwnerDocument(page);

                    } else if (page != null) {

                        newNode.setOwnerDocument(page);
                    }

                } else {

                    logger.warn("Unable to find template or shared component {}, template ignored!", src);
                }

            } else {

                logger.warn("Invalid template definition, missing src attribute!");
            }

        } else if ("structr:component".equals(tag)) {

            final String src = node.attr("src");
            if (src != null) {

                DOMNode component = null;
                if (DeployCommand.isUuid(src)) {

                    component = app.nodeQuery(DOMNode.class).and(GraphObject.id, src).getFirst();

                } else if (DeployCommand.endsWithUuid(src)) {

                    final String uuid = src.substring(src.length() - 32);
                    component = app.nodeQuery(DOMNode.class).and(GraphObject.id, uuid).getFirst();

                } else {

                    component = Importer.findSharedComponentByName(src);
                }

                if (component == null) {

                    component = createSharedComponent(node);
                }

                isNewTemplateOrComponent = true;

                if (component != null) {

                    newNode = (DOMNode) component.cloneNode(false);

                    final String _html_src = newNode.getProperty(new StringProperty("_html_src"));
                    if (!StringUtils.isEmpty(_html_src)) {
                        node.attr("src", _html_src);
                    } else {
                        node.removeAttr("src");
                    }

                    newNode.setSharedComponent(component);
                    newNode.setOwnerDocument(page);

                } else {

                    logger.warn("Unable to find shared component {} - ignored!", src);
                }

            } else {

                logger.warn("Invalid component definition, missing src attribute!");
            }

        } else {

            if (page != null) {

                newNode = (org.structr.web.entity.dom.DOMElement) page.createElement(tag, true);
            }

        }

        if (newNode != null) {

            // save root element for later use
            if (rootElement == null && !(newNode instanceof org.structr.web.entity.dom.Comment)) {
                rootElement = newNode;
            }

            // set linkable
            if (linkable != null && newNode instanceof LinkSource) {
                ((LinkSource) newNode).setLinkable(linkable);
            }

            // container for bulk setProperties()
            final PropertyMap newNodeProperties = new PropertyMap();
            final Class newNodeType = newNode.getClass();

            newNodeProperties.put(AbstractNode.visibleToPublicUsers, publicVisible);
            newNodeProperties.put(AbstractNode.visibleToAuthenticatedUsers, authVisible);

            // "id" attribute: Put it into the "_html_id" field
            if (StringUtils.isNotBlank(id)) {

                newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_id"), id);
            }

            if (StringUtils.isNotBlank(classString.toString())) {

                newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_class"),
                        StringUtils.trim(classString.toString()));
            }

            for (Attribute nodeAttr : node.attributes()) {

                final String key = nodeAttr.getKey();

                if (!key.equals("text")) { // Don't add text attribute as _html_text because the text is already contained in the 'content' attribute

                    final String value = nodeAttr.getValue();

                    if (key.startsWith("data-")) {

                        if (key.startsWith(DATA_META_PREFIX)) { // convert data-structr-meta-* attributes to local camel case properties on the node,

                            int l = DATA_META_PREFIX.length();

                            String upperCaseKey = WordUtils.capitalize(key.substring(l), new char[] { '-' })
                                    .replaceAll("-", "");
                            String camelCaseKey = key.substring(l, l + 1).concat(upperCaseKey.substring(1));

                            if (value != null) {

                                // store value using actual input converter
                                final PropertyKey actualKey = StructrApp.getConfiguration()
                                        .getPropertyKeyForJSONName(newNodeType, camelCaseKey, false);
                                if (actualKey != null) {

                                    final PropertyConverter converter = actualKey
                                            .inputConverter(securityContext);
                                    if (converter != null) {

                                        final Object convertedValue = converter.convert(value);
                                        newNodeProperties.put(actualKey, convertedValue);

                                    } else {

                                        newNodeProperties.put(actualKey, value);
                                    }

                                } else {

                                    logger.warn("Unknown meta property key {}, ignoring.", camelCaseKey);
                                }
                            }

                        } else if (key.startsWith(DATA_STRUCTR_PREFIX)) { // don't convert data-structr-* attributes as they are internal

                            final PropertyKey propertyKey = StructrApp.getConfiguration()
                                    .getPropertyKeyForJSONName(newNodeType, key);
                            if (propertyKey != null) {

                                final PropertyConverter inputConverter = propertyKey
                                        .inputConverter(securityContext);
                                if (value != null && inputConverter != null) {

                                    newNodeProperties.put(propertyKey,
                                            propertyKey.inputConverter(securityContext).convert(value));

                                } else {

                                    newNodeProperties.put(propertyKey, value);
                                }
                            }

                        } else {

                            // store data-* attributes in node
                            final PropertyKey propertyKey = new StringProperty(key);
                            if (value != null) {

                                newNodeProperties.put(propertyKey, value);
                            }
                        }

                    } else {

                        boolean notBlank = StringUtils.isNotBlank(value);
                        boolean isAnchor = notBlank && value.startsWith("#");
                        boolean isLocal = notBlank && !value.startsWith("http");
                        boolean isActive = notBlank && value.contains("${");
                        boolean isStructrLib = notBlank && value.startsWith("/structr/js/");

                        if (linkable != null && "link".equals(tag) && "href".equals(key) && isLocal && !isActive
                                && !isDeployment) {

                            newNodeProperties.put(new StringProperty(PropertyView.Html + key),
                                    "${link.path}?${link.version}");

                        } else if (linkable != null && ("href".equals(key) || "src".equals(key)) && isLocal
                                && !isActive && !isAnchor && !isStructrLib && !isDeployment) {

                            newNodeProperties.put(new StringProperty(PropertyView.Html + key), "${link.path}");

                        } else {

                            if (key.startsWith("aria-")) {

                                // use custom key
                                newNodeProperties.put(
                                        new StringProperty(
                                                CustomHtmlAttributeProperty.CUSTOM_HTML_ATTRIBUTE_PREFIX + key),
                                        value);

                            } else {

                                newNodeProperties.put(new StringProperty(PropertyView.Html + key), value);
                            }
                        }
                    }
                }
            }

            // bulk set properties on new node
            newNode.setProperties(securityContext, newNodeProperties);

            if ("script".equals(tag)) {

                final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type");
                final String contentType = newNode.getProperty(typeKey);

                if (contentType == null) {

                    // Set default type of script tag to "text/javascript" to ensure inline JS gets imported properly
                    newNode.setProperty(typeKey, "text/javascript");

                } else if (contentType.equals("application/schema+json")) {

                    for (final Node scriptContentNode : node.childNodes()) {

                        final String source = scriptContentNode.toString();

                        // Import schema JSON
                        SchemaJsonImporter.importSchemaJson(source);
                    }

                } else if (contentType.equals("application/x-structr-script")) {

                    for (final Node scriptContentNode : node.childNodes()) {

                        final String source = scriptContentNode.toString();

                        try {

                            Actions.execute(securityContext, null, source, null);

                        } catch (UnlicensedScriptException ex) {
                            ex.log(logger);
                        }
                    }

                    continue;

                } else if (contentType.equals("application/x-structr-javascript")) {

                    for (final Node scriptContentNode : node.childNodes()) {

                        final String source = scriptContentNode.toString();

                        try {

                            Actions.execute(securityContext, null, source, null);

                        } catch (UnlicensedScriptException ex) {
                            ex.log(logger);
                        }
                    }

                    continue;

                }

            } else if ("style".equals(tag)) {

                final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type");
                final String contentType = newNode.getProperty(typeKey);

                if ("text/css".equals(contentType)) {

                    // parse content of style elements and add referenced files to list of resources to be downloaded
                    for (final Node styleContentNode : node.childNodes()) {

                        final String source = styleContentNode.toString();

                        try {
                            // Import referenced resources
                            processCss(source, originalUrl);

                        } catch (IOException ex) {
                            logger.warn("Couldn't process CSS source", ex);
                        }
                    }
                }

            }

            if (instructions != null) {

                if (instructions.contains("@structr:content") && !(newNode instanceof Content)) {

                    // unhandled instructions from previous iteration => empty content element
                    createEmptyContentNode(page, parent, commentHandler, instructions);

                } else {

                    // apply instructions to new DOM element
                    if (commentHandler != null) {

                        commentHandler.handleComment(page, newNode, instructions, true);
                    }
                }

                instructions = null;
            }

            // allow parent to be null to prevent direct child relationship
            if (parent != null) {

                // special handling for <head> elements
                if (newNode instanceof Head && parent instanceof Body) {

                    final org.w3c.dom.Node html = parent.getParentNode();
                    html.insertBefore(newNode, parent);

                } else {

                    parent.appendChild(newNode);
                }
            }

            // Link new node to its parent node
            // linkNodes(parent, newNode, page, localIndex);
            // Step down and process child nodes except for newly created templates
            if (!isNewTemplateOrComponent) {

                createChildNodes(node, newNode, page, removeHashAttribute, depth + 1);

            }

        }
    }

    // reset instructions when leaving a level
    if (instructions != null) {

        createEmptyContentNode(page, parent, commentHandler, instructions);

        instructions = null;
    }

    return rootElement;
}

From source file:org.structr.web.Importer.java

private void createChildNodes(final Node startNode, final DOMNode parent, final Page page,
        final boolean removeHashAttribute) throws FrameworkException {

    Linkable res = null;/*  www  . jav  a2s. c o m*/
    final List<Node> children = startNode.childNodes();
    for (Node node : children) {

        String tag = node.nodeName();

        // clean tag, remove non-word characters
        if (tag != null) {
            tag = tag.replaceAll("[^a-zA-Z0-9#]+", "");
        }

        String type = CaseHelper.toUpperCamelCase(tag);
        String comment = null;
        String content = null;
        String id = null;
        StringBuilder classString = new StringBuilder();

        if (ArrayUtils.contains(ignoreElementNames, type)) {

            continue;
        }

        if (node instanceof Element) {

            Element el = ((Element) node);
            Set<String> classes = el.classNames();

            for (String cls : classes) {

                classString.append(cls).append(" ");
            }

            id = el.id();

            String downloadAddressAttr = (ArrayUtils.contains(srcElements, tag) ? "src"
                    : ArrayUtils.contains(hrefElements, tag) ? "href" : null);

            if (downloadAddressAttr != null && StringUtils.isNotBlank(node.attr(downloadAddressAttr))) {

                String downloadAddress = node.attr(downloadAddressAttr);
                res = downloadFile(downloadAddress, originalUrl);

            }

            if (removeHashAttribute) {

                // Remove data-structr-hash attribute
                node.removeAttr(DOMNode.dataHashProperty.jsonName());

            }

        }

        // Data and comment nodes: Trim the text and put it into the "content" field without changes
        if (/*type.equals("#data") || */type.equals("#comment")) {

            tag = "";
            comment = ((Comment) node).getData();

            // Don't add content node for whitespace
            if (StringUtils.isBlank(comment)) {

                continue;
            }

            // store for later use
            commentSource.append(comment).append("\n");

        } else if (type.equals("#data")) {

            tag = "";
            content = ((DataNode) node).getWholeData();

            // Don't add content node for whitespace
            if (StringUtils.isBlank(content)) {

                continue;
            }

        } else // Text-only nodes: Trim the text and put it into the "content" field
        {
            if (type.equals("#text")) {

                //                              type    = "Content";
                tag = "";
                //content = ((TextNode) node).getWholeText();
                content = ((TextNode) node).text();

                // Add content node for whitespace within <p> elements only
                if (!("p".equals(startNode.nodeName().toLowerCase())) && StringUtils.isWhitespace(content)) {

                    continue;
                }
            }
        }

        org.structr.web.entity.dom.DOMNode newNode;

        // create node
        if (StringUtils.isBlank(tag)) {

            // create comment or content node
            if (!StringUtils.isBlank(comment)) {

                newNode = (DOMNode) page.createComment(comment);
                newNode.setProperty(org.structr.web.entity.dom.Comment.contentType, "text/html");

            } else {

                newNode = (Content) page.createTextNode(content);
            }

        } else {

            newNode = (org.structr.web.entity.dom.DOMElement) page.createElement(tag);
        }

        if (newNode != null) {

            newNode.setProperty(AbstractNode.visibleToPublicUsers, publicVisible);
            newNode.setProperty(AbstractNode.visibleToAuthenticatedUsers, authVisible);

            if (res != null) {

                newNode.setProperty(LinkSource.linkable, res);

            }

            // "id" attribute: Put it into the "_html_id" field
            if (StringUtils.isNotBlank(id)) {

                newNode.setProperty(DOMElement._id, id);
            }

            if (StringUtils.isNotBlank(classString.toString())) {

                newNode.setProperty(DOMElement._class, StringUtils.trim(classString.toString()));
            }

            for (Attribute nodeAttr : node.attributes()) {

                final String key = nodeAttr.getKey();

                if (!key.equals("text")) { // Don't add text attribute as _html_text because the text is already contained in the 'content' attribute

                    final String value = nodeAttr.getValue();

                    if (key.startsWith("data-")) {

                        if (key.startsWith(DATA_META_PREFIX)) { // convert data-structr-meta-* attributes to local camel case properties on the node,

                            int l = DATA_META_PREFIX.length();

                            String upperCaseKey = WordUtils.capitalize(key.substring(l), new char[] { '-' })
                                    .replaceAll("-", "");
                            String camelCaseKey = key.substring(l, l + 1).concat(upperCaseKey.substring(1));

                            if (value != null) {
                                if (value.equalsIgnoreCase("true")) {
                                    newNode.setProperty(new BooleanProperty(camelCaseKey), true);
                                } else if (value.equalsIgnoreCase("false")) {
                                    newNode.setProperty(new BooleanProperty(camelCaseKey), false);
                                } else {
                                    newNode.setProperty(new StringProperty(camelCaseKey), nodeAttr.getValue());
                                }
                            }

                        } else if (key.startsWith(DATA_STRUCTR_PREFIX)) { // don't convert data-structr-* attributes as they are internal

                            PropertyKey propertyKey = config.getPropertyKeyForJSONName(newNode.getClass(), key);

                            if (propertyKey != null) {

                                final PropertyConverter inputConverter = propertyKey
                                        .inputConverter(securityContext);
                                if (value != null && inputConverter != null) {

                                    newNode.setProperty(propertyKey,
                                            propertyKey.inputConverter(securityContext).convert(value));
                                } else {

                                    newNode.setProperty(propertyKey, value);
                                }
                            }
                        }

                    } else {

                        boolean notBlank = StringUtils.isNotBlank(value);
                        boolean isAnchor = notBlank && value.startsWith("#");
                        boolean isLocal = notBlank && !value.startsWith("http");
                        boolean isActive = notBlank && value.contains("${");
                        boolean isStructrLib = notBlank && value.startsWith("/structr/js/");

                        if ("link".equals(tag) && "href".equals(key) && isLocal && !isActive) {

                            newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)),
                                    "${link.path}?${link.version}");

                        } else if (("href".equals(key) || "src".equals(key)) && isLocal && !isActive
                                && !isAnchor && !isStructrLib) {

                            newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)),
                                    "${link.path}");

                        } else {

                            newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)), value);
                        }

                    }
                }

            }

            final StringProperty typeKey = new StringProperty(PropertyView.Html.concat("type"));

            if ("script".equals(tag) && newNode.getProperty(typeKey) == null) {

                // Set default type of script tag to "text/javascript" to ensure inline JS gets imported properly
                newNode.setProperty(typeKey, "text/javascript");
            }

            parent.appendChild(newNode);

            // Link new node to its parent node
            // linkNodes(parent, newNode, page, localIndex);
            // Step down and process child nodes
            createChildNodes(node, newNode, page, removeHashAttribute);

        }
    }
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java

@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getMetadata() " + options.toString());
    // check if there is a md in the result
    if (options.getResult() != null && options.getResult().getMetadata() != null) {
        LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult());
        return options.getResult().getMetadata();
    }/* ww w. ja  v a2s. c  o  m*/

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    String imdbId = "";

    // imdbId from searchResult
    if (options.getResult() != null) {
        imdbId = options.getResult().getIMDBId();
    }

    // imdbid from scraper option
    if (!MetadataUtil.isValidImdbId(imdbId)) {
        imdbId = options.getImdbId();
    }

    if (!MetadataUtil.isValidImdbId(imdbId)) {
        return md;
    }

    LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId);
    md.setId(MediaMetadata.IMDBID, imdbId);

    ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<Document>(executor);
    ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<MediaMetadata>(
            executor);

    // worker for imdb request (/combined) (everytime from akas.imdb.com)
    // StringBuilder sb = new StringBuilder(imdbSite.getSite());
    StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/combined");
    Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().name(),
            options.getCountry().getAlpha2());
    Future<Document> futureCombined = compSvcImdb.submit(worker);

    // worker for imdb request (/plotsummary) (from chosen site)
    Future<Document> futurePlotsummary = null;
    sb = new StringBuilder(imdbSite.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/plotsummary");

    worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2());
    futurePlotsummary = compSvcImdb.submit(worker);

    // worker for tmdb request
    Future<MediaMetadata> futureTmdb = null;
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry());
        futureTmdb = compSvcTmdb.submit(worker2);
    }

    Document doc;
    doc = futureCombined.get();

    /*
     * title and year have the following structure
     * 
     * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span
     * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div>
     */

    // parse title and year
    Element title = doc.getElementById("tn15title");
    if (title != null) {
        Element element = null;
        // title
        Elements elements = title.getElementsByTag("h1");
        if (elements.size() > 0) {
            element = elements.first();
            String movieTitle = cleanString(element.ownText());
            md.storeMetadata(MediaMetadata.TITLE, movieTitle);
        }

        // year
        elements = title.getElementsByTag("span");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();

            // search year
            Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)");
            Matcher matcher = yearPattern.matcher(content);
            while (matcher.find()) {
                if (matcher.group(1) != null) {
                    String movieYear = matcher.group(1);
                    md.storeMetadata(MediaMetadata.YEAR, movieYear);
                    break;
                }
            }
        }

        // original title
        elements = title.getElementsByAttributeValue("class", "title-extra");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();
            content = content.replaceAll("\\(original title\\)", "").trim();
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, content);
        }
    }

    // poster
    Element poster = doc.getElementById("primary-poster");
    if (poster != null) {
        String posterUrl = poster.attr("src");
        posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
        posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
        processMediaArt(md, MediaArtworkType.POSTER, "Poster", posterUrl);
    }

    /*
     * <div class="starbar-meta"> <b>7.4/10</b> &nbsp;&nbsp;<a href="ratings" class="tn15more">52,871 votes</a>&nbsp;&raquo; </div>
     */

    // rating and rating count
    Element ratingElement = doc.getElementById("tn15rating");
    if (ratingElement != null) {
        Elements elements = ratingElement.getElementsByClass("starbar-meta");
        if (elements.size() > 0) {
            Element div = elements.get(0);

            // rating comes in <b> tag
            Elements b = div.getElementsByTag("b");
            if (b.size() == 1) {
                String ratingAsString = b.text();
                Pattern ratingPattern = Pattern.compile("([0-9]\\.[0-9])/10");
                Matcher matcher = ratingPattern.matcher(ratingAsString);
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        float rating = 0;
                        try {
                            rating = Float.valueOf(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.RATING, rating);
                        break;
                    }
                }
            }

            // count
            Elements a = div.getElementsByAttributeValue("href", "ratings");
            if (a.size() == 1) {
                String countAsString = a.text().replaceAll("[.,]|votes", "").trim();
                int voteCount = 0;
                try {
                    voteCount = Integer.parseInt(countAsString);
                } catch (Exception e) {
                }
                md.storeMetadata(MediaMetadata.VOTE_COUNT, voteCount);
            }
        }

        // top250
        elements = ratingElement.getElementsByClass("starbar-special");
        if (elements.size() > 0) {
            Elements a = elements.get(0).getElementsByTag("a");
            if (a.size() > 0) {
                Element anchor = a.get(0);
                Pattern topPattern = Pattern.compile("Top 250: #([0-9]{1,3})");
                Matcher matcher = topPattern.matcher(anchor.ownText());
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        int top250 = 0;
                        try {
                            top250 = Integer.parseInt(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.TOP_250, top250);
                    }
                }
            }
        }
    }

    // parse all items coming by <div class="info">
    Elements elements = doc.getElementsByClass("info");
    for (Element element : elements) {
        // only parse divs
        if (!"div".equals(element.tag().getName())) {
            continue;
        }

        // elements with h5 are the titles of the values
        Elements h5 = element.getElementsByTag("h5");
        if (h5.size() > 0) {
            Element firstH5 = h5.first();
            String h5Title = firstH5.text();

            // release date
            /*
             * <div class="info"><h5>Release Date:</h5><div class="info-content">5 January 1996 (USA)<a class="tn15more inline"
             * href="/title/tt0114746/releaseinfo"
             * onclick="(new Image()).src='/rg/title-tease/releasedates/images/b.gif?link=/title/tt0114746/releaseinfo';"> See more</a>&nbsp;</div></div>
             */
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getReleaseDate() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element releaseDateElement = div.first();
                    String releaseDate = cleanString(releaseDateElement.ownText().replaceAll("", ""));
                    Pattern pattern = Pattern.compile("(.*)\\(.*\\)");
                    Matcher matcher = pattern.matcher(releaseDate);
                    if (matcher.find()) {
                        try {
                            SimpleDateFormat sdf = new SimpleDateFormat("d MMM yyyy");
                            Date parsedDate = sdf.parse(matcher.group(1));
                            sdf = new SimpleDateFormat("dd-MM-yyyy");
                            md.storeMetadata(MediaMetadata.RELEASE_DATE, sdf.format(parsedDate));
                        } catch (Exception e) {
                        }
                    }
                }
            }

            /*
             * <div class="info"><h5>Tagline:</h5><div class="info-content"> (7) To Defend Us... <a class="tn15more inline"
             * href="/title/tt0472033/taglines" onClick= "(new Image()).src='/rg/title-tease/taglines/images/b.gif?link=/title/tt0472033/taglines';" >See
             * more</a>&nbsp;&raquo; </div></div>
             */
            // tagline
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getTagline() + ".*")
                    && !options.isScrapeImdbForeignLanguage()) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String tagline = cleanString(taglineElement.ownText().replaceAll("", ""));
                    md.storeMetadata(MediaMetadata.TAGLINE, tagline);
                }
            }

            /*
             * <div class="info-content"><a href="/Sections/Genres/Animation/">Animation</a> | <a href="/Sections/Genres/Action/">Action</a> | <a
             * href="/Sections/Genres/Adventure/">Adventure</a> | <a href="/Sections/Genres/Fantasy/">Fantasy</a> | <a
             * href="/Sections/Genres/Mystery/">Mystery</a> | <a href="/Sections/Genres/Sci-Fi/">Sci-Fi</a> | <a
             * href="/Sections/Genres/Thriller/">Thriller</a> <a class="tn15more inline" href="/title/tt0472033/keywords" onClick=
             * "(new Image()).src='/rg/title-tease/keywords/images/b.gif?link=/title/tt0472033/keywords';" > See more</a>&nbsp;&raquo; </div>
             */
            // genres are only scraped from akas.imdb.com
            if (h5Title.matches("(?i)" + imdbSite.getGenre() + "(.*)")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Elements a = div.first().getElementsByTag("a");
                    for (Element anchor : a) {
                        if (anchor.attr("href").matches("/Sections/Genres/.*")) {
                            md.addGenre(getTmmGenre(anchor.ownText()));
                        }
                    }
                }
            }
            // }

            /*
             * <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) | 178 min (extended cut)</div></div>
             */
            // runtime
            // if (h5Title.matches("(?i)" + imdbSite.getRuntime() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getRuntime() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String first = taglineElement.ownText().split("\\|")[0];
                    String runtimeAsString = cleanString(first.replaceAll("min", ""));
                    int runtime = 0;
                    try {
                        runtime = Integer.parseInt(runtimeAsString);
                    } catch (Exception e) {
                        // try to filter out the first number we find
                        Pattern runtimePattern = Pattern.compile("([0-9]{2,3})");
                        Matcher matcher = runtimePattern.matcher(runtimeAsString);
                        if (matcher.find()) {
                            runtime = Integer.parseInt(matcher.group(0));
                        }
                    }
                    md.storeMetadata(MediaMetadata.RUNTIME, runtime);
                }
            }

            /*
             * <div class="info"><h5>Country:</h5><div class="info-content"><a href="/country/fr">France</a> | <a href="/country/es">Spain</a> | <a
             * href="/country/it">Italy</a> | <a href="/country/hu">Hungary</a></div></div>
             */
            // country
            if (h5Title.matches("(?i)Country.*")) {
                Elements a = element.getElementsByTag("a");
                String countries = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/country/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String country = matcher.group(1);
                        if (StringUtils.isNotEmpty(countries)) {
                            countries += ", ";
                        }
                        countries += country.toUpperCase();
                    }
                }
                md.storeMetadata(MediaMetadata.COUNTRY, countries);
            }

            /*
             * <div class="info"><h5>Language:</h5><div class="info-content"><a href="/language/en">English</a> | <a href="/language/de">German</a> | <a
             * href="/language/fr">French</a> | <a href="/language/it">Italian</a></div>
             */
            // Spoken languages
            if (h5Title.matches("(?i)Language.*")) {
                Elements a = element.getElementsByTag("a");
                String spokenLanguages = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/language/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String langu = matcher.group(1);
                        if (StringUtils.isNotEmpty(spokenLanguages)) {
                            spokenLanguages += ", ";
                        }
                        spokenLanguages += langu;
                    }
                }
                md.storeMetadata(MediaMetadata.SPOKEN_LANGUAGES, spokenLanguages);
            }

            /*
             * <div class="info"><h5>Certification:</h5><div class="info-content"><a href="/search/title?certificates=us:pg">USA:PG</a> <i>(certificate
             * #47489)</i> | <a href="/search/title?certificates=ca:pg">Canada:PG</a> <i>(Ontario)</i> | <a
             * href="/search/title?certificates=au:pg">Australia:PG</a> | <a href="/search/title?certificates=in:u">India:U</a> | <a
             * href="/search/title?certificates=ie:pg">Ireland:PG</a> ...</div></div>
             */
            // certification
            // if (h5Title.matches("(?i)" + imdbSite.getCertification() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getCertification() + ".*")) {
                Elements a = element.getElementsByTag("a");
                for (Element anchor : a) {
                    // certification for the right country
                    if (anchor.attr("href").matches(
                            "(?i)/search/title\\?certificates=" + options.getCountry().getAlpha2() + ".*")) {
                        Pattern certificationPattern = Pattern.compile(".*:(.*)");
                        Matcher matcher = certificationPattern.matcher(anchor.ownText());
                        Certification certification = null;
                        while (matcher.find()) {
                            if (matcher.group(1) != null) {
                                certification = Certification.getCertification(options.getCountry(),
                                        matcher.group(1));
                            }
                        }

                        if (certification != null) {
                            md.addCertification(certification);
                            break;
                        }
                    }
                }
            }
        }

        /*
         * <div id="director-info" class="info"> <h5>Director:</h5> <div class="info-content"><a href="/name/nm0000416/" onclick=
         * "(new Image()).src='/rg/directorlist/position-1/images/b.gif?link=name/nm0000416/';" >Terry Gilliam</a><br/> </div> </div>
         */
        // director
        if ("director-info".equals(element.id())) {
            Elements a = element.getElementsByTag("a");
            for (Element anchor : a) {
                if (anchor.attr("href").matches("/name/nm.*")) {
                    MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR);
                    cm.setName(anchor.ownText());
                    md.addCastMember(cm);
                }
            }
        }
    }

    /*
     * <table class="cast"> <tr class="odd"><td class="hs"><a href="http://pro.imdb.com/widget/resume_redirect/" onClick=
     * "(new Image()).src='/rg/resume/prosystem/images/b.gif?link=http://pro.imdb.com/widget/resume_redirect/';" ><img src=
     * "http://i.media-imdb.com/images/SF9113d6f5b7cb1533c35313ccd181a6b1/tn15/no_photo.png" width="25" height="31" border="0"></td><td class="nm"><a
     * href="/name/nm0577828/" onclick= "(new Image()).src='/rg/castlist/position-1/images/b.gif?link=/name/nm0577828/';" >Joseph Melito</a></td><td
     * class="ddd"> ... </td><td class="char"><a href="/character/ch0003139/">Young Cole</a></td></tr> <tr class="even"><td class="hs"><a
     * href="/name/nm0000246/" onClick= "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0000246/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BMjA0MjMzMTE5OF5BMl5BanBnXkFtZTcwMzQ2ODE3Mw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0000246/" onclick=
     * "(new Image()).src='/rg/castlist/position-2/images/b.gif?link=/name/nm0000246/';" >Bruce Willis</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003139/">James Cole</a></td></tr> <tr class="odd"><td class="hs"><a href="/name/nm0781218/" onClick=
     * "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0781218/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BODI1MTA2MjkxM15BMl5BanBnXkFtZTcwMTcwMDg2Nw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0781218/" onclick=
     * "(new Image()).src='/rg/castlist/position-3/images/b.gif?link=/name/nm0781218/';" >Jon Seda</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003143/">Jose</a></td></tr>...</table>
     */
    // cast
    elements = doc.getElementsByClass("cast");
    if (elements.size() > 0) {
        Elements tr = elements.get(0).getElementsByTag("tr");
        for (Element row : tr) {
            Elements td = row.getElementsByTag("td");
            MediaCastMember cm = new MediaCastMember();
            for (Element column : td) {
                // actor thumb
                if (column.hasClass("hs")) {
                    Elements img = column.getElementsByTag("img");
                    if (img.size() > 0) {
                        String thumbUrl = img.get(0).attr("src");
                        if (thumbUrl.contains("no_photo.png")) {
                            cm.setImageUrl("");
                        } else {
                            thumbUrl = thumbUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                            thumbUrl = thumbUrl.replaceAll("SY[0-9]{2,4}_", "");
                            cm.setImageUrl(thumbUrl);
                        }
                    }
                }
                // actor name
                if (column.hasClass("nm")) {
                    cm.setName(cleanString(column.text()));
                }
                // character
                if (column.hasClass("char")) {
                    cm.setCharacter(cleanString(column.text()));
                }
            }
            if (StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) {
                cm.setType(CastType.ACTOR);
                md.addCastMember(cm);
            }
        }
    }

    Element content = doc.getElementById("tn15content");
    if (content != null) {
        elements = content.getElementsByTag("table");
        for (Element table : elements) {
            // writers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getWriter())) {
                Elements anchors = table.getElementsByTag("a");
                for (Element anchor : anchors) {
                    if (anchor.attr("href").matches("/name/nm.*")) {
                        MediaCastMember cm = new MediaCastMember(CastType.WRITER);
                        cm.setName(anchor.ownText());
                        md.addCastMember(cm);
                    }
                }
            }

            // producers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                Elements rows = table.getElementsByTag("tr");
                for (Element row : rows) {
                    if (row.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                        continue;
                    }
                    Elements columns = row.children();
                    if (columns.size() == 0) {
                        continue;
                    }
                    MediaCastMember cm = new MediaCastMember(CastType.PRODUCER);
                    String name = cleanString(columns.get(0).text());
                    if (StringUtils.isBlank(name)) {
                        continue;
                    }
                    cm.setName(name);
                    if (columns.size() >= 3) {
                        cm.setPart(cleanString(columns.get(2).text()));
                    }
                    md.addCastMember(cm);
                }
            }
        }
    }

    // Production companies
    elements = doc.getElementsByClass("blackcatheader");
    for (Element blackcatheader : elements) {
        if (blackcatheader.ownText().equals(ImdbSiteDefinition.IMDB_COM.getProductionCompanies())) {
            Elements a = blackcatheader.nextElementSibling().getElementsByTag("a");
            StringBuilder productionCompanies = new StringBuilder();
            for (Element anchor : a) {
                if (StringUtils.isNotEmpty(productionCompanies)) {
                    productionCompanies.append(", ");
                }
                productionCompanies.append(anchor.ownText());
            }
            md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, productionCompanies.toString());
            break;
        }
    }

    /*
     * plot from /plotsummary
     */
    // build the url
    doc = null;
    doc = futurePlotsummary.get();

    // imdb.com has another site structure
    if (imdbSite == ImdbSiteDefinition.IMDB_COM) {
        Elements zebraList = doc.getElementsByClass("zebraList");
        if (zebraList != null && !zebraList.isEmpty()) {
            Elements odd = zebraList.get(0).getElementsByClass("odd");
            if (odd.isEmpty()) {
                odd = zebraList.get(0).getElementsByClass("even"); // sometimes imdb has even
            }
            if (odd.size() > 0) {
                Elements p = odd.get(0).getElementsByTag("p");
                if (p.size() > 0) {
                    String plot = cleanString(p.get(0).ownText());
                    md.storeMetadata(MediaMetadata.PLOT, plot);
                }
            }
        }
    } else {
        Element wiki = doc.getElementById("swiki.2.1");
        if (wiki != null) {
            String plot = cleanString(wiki.ownText());
            md.storeMetadata(MediaMetadata.PLOT, plot);
        }
    }

    // title also from chosen site if we are not scraping akas.imdb.com
    if (imdbSite != ImdbSiteDefinition.IMDB_COM) {
        title = doc.getElementById("tn15title");
        if (title != null) {
            Element element = null;
            // title
            elements = title.getElementsByClass("main");
            if (elements.size() > 0) {
                element = elements.first();
                String movieTitle = cleanString(element.ownText());
                md.storeMetadata(MediaMetadata.TITLE, movieTitle);
            }
        }
    }
    // }

    // get data from tmdb?
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        MediaMetadata tmdbMd = futureTmdb.get();
        if (options.isScrapeImdbForeignLanguage() && tmdbMd != null
                && StringUtils.isNotBlank(tmdbMd.getStringValue(MediaMetadata.PLOT))) {
            // tmdbid
            md.setId(MediaMetadata.TMDBID, tmdbMd.getId(MediaMetadata.TMDBID));
            // title
            md.storeMetadata(MediaMetadata.TITLE, tmdbMd.getStringValue(MediaMetadata.TITLE));
            // original title
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, tmdbMd.getStringValue(MediaMetadata.ORIGINAL_TITLE));
            // tagline
            md.storeMetadata(MediaMetadata.TAGLINE, tmdbMd.getStringValue(MediaMetadata.TAGLINE));
            // plot
            md.storeMetadata(MediaMetadata.PLOT, tmdbMd.getStringValue(MediaMetadata.PLOT));
            // collection info
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
        }
        if (options.isScrapeCollectionInfo() && tmdbMd != null) {
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
        }
    }

    // if we have still no original title, take the title
    if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
        md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE));
    }

    return md;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMovieParser.java

private MediaMetadata parseReleaseinfoPageAKAs(Document doc, MediaScrapeOptions options, MediaMetadata md) {
    // <table id="akas" class="subpage_data spEven2Col">
    // <tr class="even">
    // <td>(original title)</td>
    // <td>Intouchables</td>
    // </tr>
    // need to search all tables for correct ID, since the UNIQUE id is used multiple times - thanks for nothing :p
    for (Element table : doc.getElementsByTag("table")) {
        if (table.id().equalsIgnoreCase("akas")) {
            Elements rows = table.getElementsByTag("tr");
            for (Element row : rows) {
                Element c1 = row.getElementsByTag("td").get(0);
                Element c2 = row.getElementsByTag("td").get(1);
                if (c1 != null && c1.text().toLowerCase(Locale.ROOT).contains("original title")) {
                    md.setOriginalTitle(c2.text());
                    break;
                }//w  w w. j a  v  a 2  s  .  c  o m
            }
        }
    }

    // alternative; new way with table classes
    // <tr class="ipl-zebra-list__item aka-item">
    // <td class="aka-item__name">Germany</td>
    // <td class="aka-item__title">Avatar - Aufbruch nach Pandora</td>
    // </tr>
    if (md.getOriginalTitle().isEmpty()) {
        Elements rows = doc.getElementsByClass("aka-item");
        for (Element row : rows) {
            Element country = row.getElementsByClass("aka-item__name").first();
            Element title = row.getElementsByClass("aka-item__title").first();
            if (country != null && country.text().toLowerCase(Locale.ROOT).contains("original title")) {
                md.setOriginalTitle(title.text());
                break;
            }
        }
    }

    return md;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbParser.java

protected MediaMetadata parsePlotsummaryPage(Document doc, MediaScrapeOptions options, MediaMetadata md) {
    // imdb.com has another site structure
    if (getImdbSite() == ImdbSiteDefinition.IMDB_COM) {

        // first check synopsis content
        // Element zebraList = doc.getElementById("plot-synopsis-content");
        // if (zebraList != null) {
        // Elements p = zebraList.getElementsByClass("ipl-zebra-list__item");
        // if (!p.isEmpty()) {
        // Element em = p.get(0);
        // if (!"no-synopsis-content".equals(em.id())) {
        // String plot = cleanString(em.text());
        // md.setPlot(plot);
        // }/* ww w  .  j  a va  2s .  c o  m*/
        // }
        // }
        // NOPE: synopsis contains spoilers

        // just take first summary
        // <li class="ipl-zebra-list__item" id="summary-ps21700000">
        // <p>text text text text </p>
        // <div class="author-container">
        // <em>&mdash;<a href="/search/title?plot_author=author">Author Name</a></em>
        // </div>
        // </li>
        Element zebraList = doc.getElementById("plot-summaries-content");
        if (zebraList != null) {
            Elements p = zebraList.getElementsByClass("ipl-zebra-list__item");
            if (!p.isEmpty()) {
                Element em = p.get(0);

                // remove author
                Elements authors = em.getElementsByClass("author-container");
                if (!authors.isEmpty()) {
                    authors.get(0).remove();
                }

                if (!"no-summary-content".equals(em.id())) {
                    String plot = cleanString(em.text());
                    md.setPlot(plot);
                }
            }
        }

    } else {
        Element wiki = doc.getElementById("swiki.2.1");
        if (wiki != null) {
            String plot = cleanString(wiki.ownText());
            md.setPlot(plot);
        }
    }

    return md;
}