Example usage for org.jsoup.nodes Element tagName

List of usage examples for org.jsoup.nodes Element tagName

Introduction

In this page you can find the example usage for org.jsoup.nodes Element tagName.

Prototype

public String tagName() 

Source Link

Document

Get the name of the tag for this element.

Usage

From source file:com.aquest.emailmarketing.web.controllers.BroadcastController.java

/**
 * Adds the tracking./*from w  w w  .j  a  va  2s  . c o m*/
 *
 * @param model the model
 * @param urls the urls
 * @param principal the principal
 * @param id the id
 * @param trackingFlg the tracking flg
 * @param openGAflg the open g aflg
 * @param openPixelFlg the open pixel flg
 * @param trackingType the tracking type
 * @return the string
 */
@RequestMapping(value = "/generateUrls", method = RequestMethod.POST)
public String addTracking(Model model, Urls urls, Principal principal, @RequestParam(value = "id") int id,
        @RequestParam(value = "trackingFlg", required = false) boolean trackingFlg,
        @RequestParam(value = "openGAflg", required = false) boolean openGAflg,
        @RequestParam(value = "openPixelFlg", required = false) boolean openPixelFlg,
        @RequestParam(value = "trackingType", required = false) String trackingType) {
    TrackingConfig trackingConfig = new TrackingConfig();
    Broadcast broadcast = broadcastService.getBroadcastById(id);
    String workingHtml = broadcast.getHtmlbody();
    if (trackingFlg == true) {
        if (openGAflg == true) {
            workingHtml = emailTracking.addGaOpenEmailTracking(workingHtml, urls);
            System.out.println("GA Open: " + workingHtml);
        }
        if (openPixelFlg == true) {
            workingHtml = emailTracking.addPixelOpenEmailTracking(workingHtml);
            System.out.println("Pixel Open: " + workingHtml);
        }
        if (trackingType.equals("ga")) {
            workingHtml = emailTracking.addGaTrackingToUrl(workingHtml, urls);
            System.out.println("GA Click added: " + workingHtml);
        } else if (trackingType.equals("intTrack")) {
            workingHtml = emailTracking.addIntTrackingToUrl(workingHtml, urls);
            System.out.println("Internal Tracking: " + workingHtml);
        } else {
            workingHtml = emailTracking.addBothTrackingToUrl(workingHtml, urls);
        }

    }

    broadcast.setHtmlbody_tracking(workingHtml);
    System.out.println(broadcast.getHtmlbody_tracking());
    String confirm = broadcastService.SaveOrUpdate(broadcast);
    System.out.println(confirm);
    System.out.println(trackingFlg);
    System.out.println(openGAflg);
    System.out.println(openPixelFlg);
    System.out.println(trackingType);
    if (confirm == broadcast.getBroadcast_id()) {
        trackingConfig.setBroadcast_id(broadcast.getBroadcast_id());
        // taking care of tracking flg
        int tracking_flg = 0;
        if (trackingFlg == true) {
            tracking_flg = 1;
        }
        trackingConfig.setTracking_flg(tracking_flg);
        // taking care of openGAflg
        int open_ga_flg = 0;
        if (openGAflg == true) {
            open_ga_flg = 1;
        }
        trackingConfig.setOpen_ga_flg(open_ga_flg);
        // taking care of openPixelFlg
        int open_pixel_flg = 0;
        if (openPixelFlg == true) {
            open_pixel_flg = 1;
        }
        trackingConfig.setOpen_pixel_flg(open_pixel_flg);
        // set tracking type
        trackingConfig.setTracking_type(trackingType);
        // seting utm's
        trackingConfig.setUtm_campaign(urls.getUtmCampaign());
        trackingConfig.setUtm_content(urls.getUtmContent());
        trackingConfig.setUtm_medium(urls.getUtmMedium());
        trackingConfig.setUtm_source(urls.getUtmSource());
        trackingConfigService.SaveOrUpdate(trackingConfig);
    }
    // find images in html to be able to embed images in email as in-line attachments
    EmbeddedImage embeddedImage = new EmbeddedImage();
    //HashSet to avoid duplicates
    Set<String> imgList = new HashSet<String>();
    String html = broadcast.getHtmlbody();
    Document doc = Jsoup.parse(html);
    Elements media = doc.select("[src]");
    for (Element src : media) {
        if (src.tagName().equals("img")) {
            imgList.add(src.attr("abs:src"));
        }
    }
    model.addAttribute("imgList", imgList);
    model.addAttribute("embeddedImage", embeddedImage);
    model.addAttribute("broadcast", broadcast);
    return "embeddedimage";
}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

void parseMultipleMonitorDays(SubstitutionSchedule v, Document doc, JSONObject data)
        throws JSONException, CredentialInvalidException {
    if (doc.select(".mon_head").size() > 1) {
        for (int j = 0; j < doc.select(".mon_head").size(); j++) {
            Document doc2 = Document.createShell(doc.baseUri());
            doc2.body().appendChild(doc.select(".mon_head").get(j).clone());
            Element next = doc.select(".mon_head").get(j).nextElementSibling();
            if (next != null && next.tagName().equals("center")) {
                doc2.body().appendChild(next.select(".mon_title").first().clone());
                if (next.select("table:has(tr.list)").size() > 0) {
                    doc2.body().appendChild(next.select("table:has(tr.list)").first());
                }// w w  w.  ja  v  a 2  s  . co  m
                if (next.select("table.info").size() > 0) {
                    doc2.body().appendChild(next.select("table.info").first());
                }
            } else if (doc.select(".mon_title").size() - 1 >= j) {
                doc2.body().appendChild(doc.select(".mon_title").get(j).clone());
                doc2.body().appendChild(doc.select("table:has(tr.list)").get(j).clone());
            } else {
                continue;
            }
            SubstitutionScheduleDay day = parseMonitorDay(doc2, data);
            v.addDay(day);
        }
    } else if (doc.select(".mon_title").size() > 1) {
        for (int j = 0; j < doc.select(".mon_title").size(); j++) {
            Document doc2 = Document.createShell(doc.baseUri());
            doc2.body().appendChild(doc.select(".mon_title").get(j).clone());
            Element next = doc.select(".mon_title").get(j).nextElementSibling();
            while (next != null && !next.tagName().equals("center")) {
                doc2.body().appendChild(next);
                next = doc.select(".mon_title").get(j).nextElementSibling();
            }
            SubstitutionScheduleDay day = parseMonitorDay(doc2, data);
            v.addDay(day);
        }
    } else {
        SubstitutionScheduleDay day = parseMonitorDay(doc, data);
        v.addDay(day);
    }
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

private SearchField createSearchField(String name, String hint, Element input) {
    if (input.tagName().equals("input") && input.attr("type").equals("text")) {
        TextSearchField field = new TextSearchField();
        field.setDisplayName(name);/*  www.j  a v a  2 s  . co m*/
        field.setHint(hint);
        field.setId(input.attr("name"));
        return field;
    } else if (input.tagName().equals("select")) {
        DropdownSearchField field = new DropdownSearchField();
        field.setDisplayName(name);
        field.setId(input.attr("name"));
        for (Element option : input.select("option")) {
            field.addDropdownValue(option.attr("value"), option.text());
        }
        return field;
    } else {
        return null;
    }
}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * remove any divs that looks like non-content, clusters of links, or paras with no gusto
 *
 * @param node/*from   www. j  a v  a  2s .  c om*/
 * @return
 */
private Element cleanupNode(Element node) {
    if (logger.isDebugEnabled()) {
        logger.debug("Starting cleanup Node");
    }

    node = addSiblings(node);

    Elements nodes = node.children();
    for (Element e : nodes) {
        if (e.tagName().equals("p")) {
            continue;
        }
        if (logger.isDebugEnabled()) {
            logger.debug("CLEANUP  NODE: " + e.id() + " class: " + e.attr("class"));
        }
        boolean highLinkDensity = isHighLinkDensity(e);
        if (highLinkDensity) {
            if (logger.isDebugEnabled()) {
                logger.debug("REMOVING  NODE FOR LINK DENSITY: " + e.id() + " class: " + e.attr("class"));
            }
            e.remove();
            continue;
        }
        // now check for word density
        // grab all the paragraphs in the children and remove ones that are too small to matter
        Elements subParagraphs = e.getElementsByTag("p");

        for (Element p : subParagraphs) {
            if (p.text().length() < 25) {
                p.remove();
            }
        }

        // now that we've removed shorty paragraphs let's make sure to exclude any first paragraphs that don't have paras as
        // their next siblings to avoid getting img bylines
        // first let's remove any element that now doesn't have any p tags at all
        Elements subParagraphs2 = e.getElementsByTag("p");
        if (subParagraphs2.size() == 0 && !e.tagName().equals("td")) {
            if (logger.isDebugEnabled()) {
                logger.debug("Removing node because it doesn't have any paragraphs");
            }
            e.remove();
            continue;
        }

        //if this node has a decent enough gravityScore we should keep it as well, might be content
        int topNodeScore = getScore(node);
        int currentNodeScore = getScore(e);
        float thresholdScore = (float) (topNodeScore * .08);
        if (logger.isDebugEnabled()) {
            logger.debug("topNodeScore: " + topNodeScore + " currentNodeScore: " + currentNodeScore
                    + " threshold: " + thresholdScore);
        }
        if (currentNodeScore < thresholdScore) {
            if (!e.tagName().equals("td")) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Removing node due to low threshold score");
                }
                e.remove();
            } else {
                if (logger.isDebugEnabled()) {
                    logger.debug("Not removing TD node");
                }
            }

            continue;
        }

    }

    return node;

}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * alot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to
 * boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs
 * so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it
 *
 *
 * @param node/*from   w  w  w . j av a  2 s .c  o m*/
 * @return
 */
private boolean isOkToBoost(Element node) {

    int stepsAway = 0;

    Element sibling = node.nextElementSibling();
    while (sibling != null) {

        if (sibling.tagName().equals("p")) {
            if (stepsAway >= 3) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Next paragraph is too far away, not boosting");
                }
                return false;
            }

            String paraText = sibling.text();
            WordStats wordStats = StopWords.getStopWordCount(paraText);
            if (wordStats.getStopWordCount() > 5) {
                if (logger.isDebugEnabled()) {
                    logger.debug("We're gonna boost this node, seems contenty");
                }
                return true;
            }

        }

        // increase how far away the next paragraph is from this node
        stepsAway++;

        sibling = sibling.nextElementSibling();
    }

    return false;
}

From source file:de.geeksfactory.opacclient.apis.Open.java

/**
 * Better version of JSoup's implementation of this function ({@link
 * org.jsoup.nodes.FormElement#formData()}).
 *
 * @param form       The form to submit//from w  ww. j  ava 2s  . com
 * @param submitName The name attribute of the button which is clicked to submit the form, or
 *                   null
 * @return A MultipartEntityBuilder containing the data of the form
 */
protected MultipartEntityBuilder formData(FormElement form, String submitName) {
    MultipartEntityBuilder data = MultipartEntityBuilder.create();
    data.setLaxMode();

    // iterate the form control elements and accumulate their values
    for (Element el : form.elements()) {
        if (!el.tag().isFormSubmittable()) {
            continue; // contents are form listable, superset of submitable
        }
        String name = el.attr("name");
        if (name.length() == 0)
            continue;
        String type = el.attr("type");

        if ("select".equals(el.tagName())) {
            Elements options = el.select("option[selected]");
            boolean set = false;
            for (Element option : options) {
                data.addTextBody(name, option.val());
                set = true;
            }
            if (!set) {
                Element option = el.select("option").first();
                if (option != null) {
                    data.addTextBody(name, option.val());
                }
            }
        } else if ("checkbox".equalsIgnoreCase(type) || "radio".equalsIgnoreCase(type)) {
            // only add checkbox or radio if they have the checked attribute
            if (el.hasAttr("checked")) {
                data.addTextBody(name, el.val().length() > 0 ? el.val() : "on");
            }
        } else if ("submit".equalsIgnoreCase(type) || "image".equalsIgnoreCase(type)) {
            if (submitName != null && el.attr("name").contains(submitName)) {
                data.addTextBody(name, el.val());
            }
        } else {
            data.addTextBody(name, el.val());
        }
    }
    return data;
}

From source file:com.iorga.iraj.servlet.AgglomeratorServlet.java

private long searchAndAppendAfter(final ServletConfig config, final Element agglomerateElement,
        final String scriptSrc, final String pathPrefix, final String pathSuffix, final String urlAttribute,
        long lastModified) throws MalformedURLException, IOException, URISyntaxException {
    if (mode == Mode.DEVELOPMENT) {
        // add a watch for that directory
        final Path path = Paths.get(config.getServletContext().getRealPath(scriptSrc));
        path.register(watchService, StandardWatchEventKinds.ENTRY_CREATE, StandardWatchEventKinds.ENTRY_DELETE);
    }//from   w ww  .j a  v a2 s.  c  o  m
    final Set<String> childrenPaths = config.getServletContext().getResourcePaths(scriptSrc);
    for (final String path : childrenPaths) {
        if (path.endsWith(pathSuffix)) {
            // add that JS
            final StringBuilder targetScript = new StringBuilder("<");
            targetScript.append(agglomerateElement.tagName());
            // copy all the origin attributes
            for (final Attribute attribute : agglomerateElement.attributes()) {
                final String key = attribute.getKey();
                if (!ATTRIBUTE_NAME.equalsIgnoreCase(key) && !urlAttribute.equalsIgnoreCase(key)
                        && !URL_ATTRIBUTE_ATTRIBUTE_NAME.equalsIgnoreCase(key)) {
                    targetScript.append(" ").append(attribute.html());
                }
            }
            // specify the src path
            final String childUrl = StringUtils.removeStart(path, pathPrefix);
            targetScript.append(" ").append(new Attribute(urlAttribute, childUrl).html()).append(" />");
            agglomerateElement.after(targetScript.toString());
            lastModified = Math.max(
                    config.getServletContext().getResource(childUrl).openConnection().getLastModified(),
                    lastModified);
        } else if (path.endsWith("/")) {
            // it's a directory, recurse search & append
            lastModified = Math.max(searchAndAppendAfter(config, agglomerateElement, path, pathPrefix,
                    pathSuffix, urlAttribute, lastModified), lastModified);
        }
    }
    return lastModified;
}

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * adds any siblings that may have a decent score to this node
 *
 * @param node/*from   www . j  a  va2 s.c o  m*/
 * @return
 */
private Element addSiblings(Element node) {
    if (logger.isDebugEnabled()) {
        logger.debug("Starting to add siblings");
    }
    int baselineScoreForSiblingParagraphs = getBaselineScoreForSiblings(node);

    Element currentSibling = node.previousElementSibling();
    while (currentSibling != null) {
        if (logger.isDebugEnabled()) {
            logger.debug("SIBLINGCHECK: " + debugNode(currentSibling));
        }

        if (currentSibling.tagName().equals("p")) {

            node.child(0).before(currentSibling.outerHtml());
            currentSibling = currentSibling.previousElementSibling();
            continue;
        }

        // check for a paraph embedded in a containing element
        int insertedSiblings = 0;
        Elements potentialParagraphs = currentSibling.getElementsByTag("p");
        if (potentialParagraphs.first() == null) {
            currentSibling = currentSibling.previousElementSibling();
            continue;
        }
        for (Element firstParagraph : potentialParagraphs) {
            WordStats wordStats = StopWords.getStopWordCount(firstParagraph.text());

            int paragraphScore = wordStats.getStopWordCount();

            if ((float) (baselineScoreForSiblingParagraphs * .30) < paragraphScore) {
                if (logger.isDebugEnabled()) {
                    logger.debug("This node looks like a good sibling, adding it");
                }
                node.child(insertedSiblings).before("<p>" + firstParagraph.text() + "<p>");
                insertedSiblings++;
            }

        }

        currentSibling = currentSibling.previousElementSibling();
    }
    return node;

}

From source file:de.geeksfactory.opacclient.apis.BiBer1992.java

@Override
public List<SearchField> getSearchFields() throws IOException {
    List<SearchField> fields = new ArrayList<>();

    HttpGet httpget;/*  w  w  w  .  j  a  v a2  s  . c  o  m*/
    if (opacDir.contains("opax")) {
        httpget = new HttpGet(opacUrl + "/" + opacDir + "/de/qsel.html.S");
    } else {
        httpget = new HttpGet(opacUrl + "/" + opacDir + "/de/qsel_main.S");
    }

    HttpResponse response = http_client.execute(httpget);

    if (response.getStatusLine().getStatusCode() == 500) {
        throw new NotReachableException(response.getStatusLine().getReasonPhrase());
    }
    String html = convertStreamToString(response.getEntity().getContent());
    HttpUtils.consume(response.getEntity());

    Document doc = Jsoup.parse(html);

    // get text fields
    Elements text_opts = doc.select("form select[name=REG1] option");
    for (Element opt : text_opts) {
        TextSearchField field = new TextSearchField();
        field.setId(opt.attr("value"));
        field.setDisplayName(opt.text());
        field.setHint("");
        fields.add(field);
    }

    // get media types
    Elements mt_opts = doc.select("form input[name~=(MT|MS)]");
    if (mt_opts.size() > 0) {
        DropdownSearchField mtDropdown = new DropdownSearchField();
        mtDropdown.setId(mt_opts.get(0).attr("name"));
        mtDropdown.setDisplayName("Medientyp");
        for (Element opt : mt_opts) {
            if (!opt.val().equals("")) {
                String text = opt.text();
                if (text.length() == 0) {
                    // text is empty, check layouts:
                    // Essen: <input name="MT"><img title="mediatype">
                    // Schaffenb: <input name="MT"><img alt="mediatype">
                    Element img = opt.nextElementSibling();
                    if (img != null && img.tagName().equals("img")) {
                        text = img.attr("title");
                        if (text.equals("")) {
                            text = img.attr("alt");
                        }
                    }
                }
                if (text.length() == 0) {
                    // text is still empty, check table layout, Example
                    // Friedrichshafen
                    // <td><input name="MT"></td> <td><img
                    // title="mediatype"></td>
                    Element td1 = opt.parent();
                    Element td2 = td1.nextElementSibling();
                    if (td2 != null) {
                        Elements td2Children = td2.select("img[title]");
                        if (td2Children.size() > 0) {
                            text = td2Children.get(0).attr("title");
                        }
                    }
                }
                if (text.length() == 0) {
                    // text is still empty, check images in label layout, Example
                    // Wiedenst
                    // <input type="radio" name="MT" id="MTYP1" value="MTYP1">
                    // <label for="MTYP1"><img src="http://www.wiedenest.de/bib/image/books
                    // .png" alt="Bcher" title="Bcher"></label>
                    Element label = opt.nextElementSibling();
                    if (label != null) {
                        Elements td2Children = label.select("img[title]");
                        if (td2Children.size() > 0) {
                            text = td2Children.get(0).attr("title");
                        }
                    }
                }
                if (text.length() == 0) {
                    // text is still empty: missing end tag like Offenburg
                    text = parse_option_regex(opt);
                }
                mtDropdown.addDropdownValue(opt.val(), text);
            }
        }
        fields.add(mtDropdown);
    }

    // get branches
    Elements br_opts = doc.select("form select[name=ZW] option");
    if (br_opts.size() > 0) {
        DropdownSearchField brDropdown = new DropdownSearchField();
        brDropdown.setId(br_opts.get(0).parent().attr("name"));
        brDropdown.setDisplayName(br_opts.get(0).parent().parent().previousElementSibling().text()
                .replace("\u00a0", "").replace("?", "").trim());
        for (Element opt : br_opts) {
            brDropdown.addDropdownValue(opt.val(), opt.text());
        }
        fields.add(brDropdown);
    }

    return fields;
}