Example usage for org.jsoup.nodes Element id

List of usage examples for org.jsoup.nodes Element id

Introduction

In this page you can find the example usage for org.jsoup.nodes Element id.

Prototype

public String id() 

Source Link

Document

Get the id attribute of this element.

Usage

From source file:com.jimplush.goose.ContentExtractor.java

private String debugNode(Element e) {

    StringBuilder sb = new StringBuilder();
    sb.append("GravityScore: '");
    sb.append(e.attr("gravityScore"));
    sb.append("' paraNodeCount: '");
    sb.append(e.attr("gravityNodes"));
    sb.append("' nodeId: '");
    sb.append(e.id());
    sb.append("' className: '");
    sb.append(e.attr("class"));
    return sb.toString();

}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * we're going to start looking for where the clusters of paragraphs are. We'll score a cluster based on the number of stopwords
 * and the number of consecutive paragraphs together, which should form the cluster of text that this node is around
 * also store on how high up the paragraphs are, comments are usually at the bottom and should get a lower score
 *
 * @return//from w  w w . j  ava  2  s  .co m
 */
private Element calculateBestNodeBasedOnClustering(Document doc) {
    Element topNode = null;

    // grab all the paragraph elements on the page to start to inspect the likely hood of them being good peeps
    ArrayList<Element> nodesToCheck = getNodesToCheck(doc);

    double startingBoost = 1.0;
    int cnt = 0;
    int i = 0;

    // holds all the parents of the nodes we're checking
    Set<Element> parentNodes = new HashSet<Element>();

    ArrayList<Element> nodesWithText = new ArrayList<Element>();

    for (Element node : nodesToCheck) {

        String nodeText = node.text();
        WordStats wordStats = StopWords.getStopWordCount(nodeText);
        boolean highLinkDensity = isHighLinkDensity(node);

        if (wordStats.getStopWordCount() > 2 && !highLinkDensity) {

            nodesWithText.add(node);
        }

    }

    int numberOfNodes = nodesWithText.size();
    int negativeScoring = 0; // we shouldn't give more negatives than positives
    // we want to give the last 20% of nodes negative scores in case they're comments
    double bottomNodesForNegativeScore = (float) numberOfNodes * 0.25;

    if (logger.isDebugEnabled()) {
        logger.debug("About to inspect num of nodes with text: " + numberOfNodes);
    }

    for (Element node : nodesWithText) {

        // add parents and grandparents to scoring
        // only add boost to the middle paragraphs, top and bottom is usually jankz city
        // so basically what we're doing is giving boost scores to paragraphs that appear higher up in the dom
        // and giving lower, even negative scores to those who appear lower which could be commenty stuff

        float boostScore = 0;

        if (isOkToBoost(node)) {
            if (cnt >= 0) {
                boostScore = (float) ((1.0 / startingBoost) * 50);
                startingBoost++;
            }
        }

        // check for negative node values
        if (numberOfNodes > 15) {
            if ((numberOfNodes - i) <= bottomNodesForNegativeScore) {
                float booster = (float) bottomNodesForNegativeScore - (float) (numberOfNodes - i);
                boostScore = -(float) Math.pow(booster, (float) 2);

                // we don't want to score too highly on the negative side.
                float negscore = Math.abs(boostScore) + negativeScoring;
                if (negscore > 40) {
                    boostScore = 5;
                }
            }
        }

        if (logger.isDebugEnabled()) {
            logger.debug("Location Boost Score: " + boostScore + " on interation: " + i + "' id='"
                    + node.parent().id() + "' class='" + node.parent().attr("class"));
        }
        String nodeText = node.text();
        WordStats wordStats = StopWords.getStopWordCount(nodeText);
        int upscore = (int) (wordStats.getStopWordCount() + boostScore);
        updateScore(node.parent(), upscore);
        updateScore(node.parent().parent(), upscore / 2);
        updateNodeCount(node.parent(), 1);
        updateNodeCount(node.parent().parent(), 1);

        if (!parentNodes.contains(node.parent())) {
            parentNodes.add(node.parent());
        }

        if (!parentNodes.contains(node.parent().parent())) {
            parentNodes.add(node.parent().parent());
        }

        cnt++;
        i++;
    }

    // now let's find the parent node who scored the highest

    int topNodeScore = 0;
    for (Element e : parentNodes) {

        if (logger.isDebugEnabled()) {
            logger.debug("ParentNode: score='" + e.attr("gravityScore") + "' nodeCount='"
                    + e.attr("gravityNodes") + "' id='" + e.id() + "' class='" + e.attr("class") + "' ");
        }
        //int score = Integer.parseInt(e.attr("gravityScore")) * Integer.parseInt(e.attr("gravityNodes"));
        int score = getScore(e);
        if (score > topNodeScore) {
            topNode = e;
            topNodeScore = score;
        }

        if (topNode == null) {
            topNode = e;
        }
    }

    if (logger.isDebugEnabled()) {
        if (topNode == null) {
            logger.debug("ARTICLE NOT ABLE TO BE EXTRACTED!, WE HAZ FAILED YOU LORD VADAR");
        } else {
            String logText;
            String targetText = "";
            Element topPara = topNode.getElementsByTag("p").first();
            if (topPara == null) {
                topNode.text();
            } else {
                topPara.text();
            }

            if (targetText.length() >= 51) {
                logText = targetText.substring(0, 50);
            } else {
                logText = targetText;
            }
            logger.debug("TOPNODE TEXT: " + logText.trim());
            logger.debug("Our TOPNODE: score='" + topNode.attr("gravityScore") + "' nodeCount='"
                    + topNode.attr("gravityNodes") + "' id='" + topNode.id() + "' class='"
                    + topNode.attr("class") + "' ");
        }
    }

    return topNode;

}

From source file:cc.metapro.openct.custom.CustomPresenter.java

@Override
public void setWebView(final InteractiveWebView webView, final FragmentManager manager) {
    webView.setUserClickCallback(new InteractiveWebView.ClickCallback() {
        @Override/*  ww w.  j  av  a  2  s.c o m*/
        public void onClick(@NonNull final Element element) {
            if (HTMLUtils.isPasswordInput(element)) {
                if (!webView.setById(element.id(), "value", mPassword)) {
                    webView.setByName(element.attr("name"), "value", mPassword);
                }
            } else if (HTMLUtils.isTextInput(element)) {
                ClickDialog.newInstance(new ClickDialog.TypeCallback() {
                    @Override
                    public void onResult(String type) {
                        switch (type) {
                        case InteractiveWebView.COMMON_INPUT_FLAG:
                            if (!webView.focusById(element.id())) {
                                webView.focusByName(element.attr("name"));
                            }
                            break;
                        case InteractiveWebView.USERNAME_INPUT_FLAG:
                            if (!webView.setById(element.id(), "value", mUsername)) {
                                webView.setByName(element.attr("name"), "value", mUsername);
                            }
                            break;
                        }
                    }
                }).show(manager, "click_dialog");
            }
        }
    });
}

From source file:org.asqatasun.ruleimplementation.AbstractMarkerPageRuleImplementation.java

/**
 * To sort marker elements, we extract for each of them the value of the
 * "id" attribute the value of the "class" attribute and the value of the
 * "role" attribute. If one of these three values belongs to the marker
 * value list set by the user, we consider that the element is characterised
 * and we add it to the "elementMarkerList".
 *
 * @param nodeList//from  ww w . j  av  a2  s .  c  o m
 */
private void sortMarkerElements() {
    if ((CollectionUtils.isEmpty(markerList) && CollectionUtils.isEmpty(inverseMarkerList))
            || selectionWithoutMarkerHandler.isEmpty()) {
        return;
    }
    Iterator<Element> iter = selectionWithoutMarkerHandler.get().iterator();
    Element el;
    while (iter.hasNext()) {
        el = iter.next();
        String id = el.id();
        Collection<String> classNames = el.classNames();
        String role = el.attr(ROLE_ATTR);
        // if the element does contain an "id" OR a "class" attribute OR
        // a "role" attribute AND one the values belongs to the marker list, 
        // it is removed from the global selection and added to the 
        // marker element selection.
        if (StringUtils.isNotBlank(id) || CollectionUtils.isNotEmpty(classNames)
                || StringUtils.isNotBlank(role)) {
            if (checkAttributeBelongsToMarkerList(id, classNames, role, markerList)) {
                selectionWithMarkerHandler.add(el);
                iter.remove();
            }
            // if the element belongs to the inverse marker list, it is
            // removed from the global collection
            if (checkAttributeBelongsToMarkerList(id, classNames, role, inverseMarkerList)) {
                iter.remove();
            }
        }
    }
}

From source file:org.asqatasun.rules.accessiweb22.Aw22Rule11013.java

/**
 * This methods checks whether the value of the for attribute of a label
 * node corresponds to the value of the id attribute of any child form field.
 * //from w w  w . j  a  va  2s .co  m
 * @param childNodes
 * @param forAttributeValue
 * @return
 */
private boolean isForAttributeOfLabelEqualsToIdAttributeOfFormField(Element element, String forAttributeValue) {
    if (StringUtils.isBlank(forAttributeValue)) {
        return false;
    }
    for (Element el : element.children().select(FORM_ELEMENT_CSS_LIKE_QUERY)) {
        if (StringUtils.equalsIgnoreCase(forAttributeValue, el.id())) {
            return true;
        }
    }
    return false;
}

From source file:org.asqatasun.rules.doc.utils.exportdomtocsv.ExportDomToCsv.java

/**
 * Before using it please set the FOLDER variable with the path where you
 * want to create your csv file.//from  w  w  w  .  ja v  a 2 s. c  o  m
 *
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    File ref = FileUtils.getFile(FOLDER);
    JsoupFunc jsf = new JsoupFunc();
    Document doc = jsf.getDocument();
    Elements thematiques = doc.select("div.thematique");
    StringBuilder sb = new StringBuilder();
    String testCode;
    String testLabel = "";
    String critere;
    for (int i = 2; i < thematiques.size(); i++) {
        String themeIndex = String.valueOf(i - 1) + "";
        String theme = (thematiques.get(i).child(0).text() + "");
        Elements criteres = thematiques.get(i).select("h3");
        for (int j = 1; j < criteres.size(); j++) {
            Element critereLevel = criteres.get(j);
            String critereH3String = critereLevel.toString();
            String level = critereH3String.substring(critereH3String.indexOf("[") + 1,
                    critereH3String.indexOf("]")) + "";
            Elements tests = criteres.get(j).nextElementSibling().select("[id^=test-]");
            try {
                critere = criteres.get(j).id().substring(5, 10) + "";
            } catch (StringIndexOutOfBoundsException sioobe) {
                try {
                    critere = criteres.get(j).id().substring(5, 9) + "";
                } catch (StringIndexOutOfBoundsException sioobe2) {
                    critere = criteres.get(j).id().substring(5, 8) + "";
                }
            }
            String[] critereArray = criteres.get(j).text().split("] ");
            String critereLabel = critereArray[1].toString() + "";
            for (Element el : tests) {
                Pattern digitPattern = Pattern.compile("\\d+\\.\\d+\\.\\d+\\s?\\:?\\s?");
                Matcher matcher = digitPattern.matcher(el.text());
                if (matcher.find()) {
                    String testLabelReplace = el.html()
                            .replace("index.php", "http://www.accessiweb.org/index.php").replace("\n", "");
                    testLabel = testLabelReplace.substring(matcher.end(), testLabelReplace.length()) + "";
                }
                try {
                    testCode = el.id().substring(5, 12) + "";
                } catch (StringIndexOutOfBoundsException sioobe) {
                    try {
                        testCode = (el.id().substring(5, 11) + "");
                    } catch (StringIndexOutOfBoundsException sioobe3) {
                        testCode = (el.id().substring(5, 10) + "");
                    }
                }
                sb.append(themeIndex + theme + critere + critereLabel + testCode + testLabel + level + "\n");
            }
        }
    }
    FileUtils.writeStringToFile(ref, sb.toString());
}

From source file:org.asqatasun.rules.doc.utils.rga33.extractor.Rgaa3Extractor.java

/**
 * @param args the command line arguments
 * @throws java.io.IOException//from w w  w . j a  v a2s .co  m
 */
public static void main(String[] args) throws IOException {
    // Get content from websites
    Document rgaa3Doc = Jsoup.parse(new URL(RGAA3_REF_URL), 10000);
    Document aw22Doc = Jsoup.parse(new URL(AW22_URL), 10000);

    extractLevelFromCriterionAndWrite(rgaa3Doc);
    extractRuleInfo(rgaa3Doc);

    // Extract rules from accessiweb 2.2
    for (Element el : aw22Doc.select(TEST_SELECTOR)) {
        if (StringUtils.isNotBlank(el.text())) {
            AW22.put(extractTestFromId(el.id()), el.text());
        }
    }

    //        generateMysql();
    compareReferentials();
    //
    //        getRuleI18nKeys();
    //        checkClassesExist();
    //        updateTestcasesWithRuleTitle();
    //        generateMkdoc();
    //        createTestcaseFiles();

}

From source file:org.asqatasun.rules.doc.utils.rga33.extractor.Rgaa3Extractor.java

private static void extractLevelFromCriterionAndWrite(Document doc) throws IOException {
    StringBuilder crit = new StringBuilder();
    for (Element el : doc.select(CRITERION_SELECTOR)) {
        if (StringUtils.isNotBlank(el.id())) {
            crit.append(el.id().replace("crit", "Rgaa30"));
            crit.append("=");
            String content = el.html();
            content = content.substring(content.indexOf("] ") + 1);
            content = extractRuleContent(content);
            crit.append(content);//from   ww  w  .ja v a  2s . c om
            crit.append("\n");
            String level = el.text().substring(el.text().indexOf("[") + 1, el.text().indexOf("]"));
            levelFromCrit.put(el.id().replaceAll("crit-", ""), level);
        }
    }
    if (writeCritInFile) {
        FileUtils.write(new File(CRITERION_I18N_FILE_PATH), crit.toString());
    }
}

From source file:org.asqatasun.rules.doc.utils.rga33.extractor.Rgaa3Extractor.java

private static void extractRuleInfo(Document doc) {
    boolean isFirst112 = false;
    for (Element el : doc.select(TEST_SELECTOR)) {
        if (StringUtils.isNotBlank(el.id())) {
            Rule rule = new Rule(RGAA3_REF_URL, REF_NAME);
            rule.ruleId = el.id();//from   w  ww  . ja v  a2 s  .  com
            rule.ruleDash = extractTestFromId(rule.ruleId);
            if (rule.ruleDash.equals("1-1-2")) {
                if (!isFirst112) {
                    isFirst112 = true;
                } else {
                    rule.ruleDash = "1-1-4";
                    rule.ruleId = "test-1-1-4";
                }
            } else if (rule.ruleDash.equals("11-1-4-5")) {
                rule.ruleDash = "11-14-5";
            }
            rule.setRuleRawHtml(el.html().replaceAll("href=\"", "href=\"" + RGAA3_MAIN_URL));
            rule.ruleHtmlWithoutLink = extractRuleContent(rule.ruleRawHtml);
            rule.ruleText = el.text();
            rule.level = levelFromCrit.get(rule.getCriterion());
            RGAA3.put(rule.ruleDash, rule);
        }
    }
}