Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.aquest.emailmarketing.web.controllers.BroadcastTemplateController.java

/**
 * Adds the tracking./*from ww  w  . ja v  a2 s  .  co m*/
 *
 * @param model the model
 * @param urls the urls
 * @param principal the principal
 * @param id the id
 * @param trackingFlg the tracking flg
 * @param openGAflg the open g aflg
 * @param openPixelFlg the open pixel flg
 * @param trackingType the tracking type
 * @return the string
 */
@RequestMapping(value = "/bcastTempGenerateUrls", method = RequestMethod.POST)
public String addTracking(Model model, Urls urls, Principal principal, @RequestParam(value = "id") int id,
        @RequestParam(value = "trackingFlg", required = false) boolean trackingFlg,
        @RequestParam(value = "openGAflg", required = false) boolean openGAflg,
        @RequestParam(value = "openPixelFlg", required = false) boolean openPixelFlg,
        @RequestParam(value = "trackingType", required = false) String trackingType) {
    TrackingConfig trackingConfig = new TrackingConfig();
    BroadcastTemplate broadcastTemplate = broadcastTemplateService.getBroadcastTemplateById(id);
    String workingHtml = broadcastTemplate.getHtmlbody();
    if (trackingFlg == true) {
        if (openGAflg == true) {
            workingHtml = emailTracking.addGaOpenEmailTracking(workingHtml, urls);
            System.out.println("GA Open: " + workingHtml);
        }
        if (openPixelFlg == true) {
            workingHtml = emailTracking.addPixelOpenEmailTracking(workingHtml);
            System.out.println("Pixel Open: " + workingHtml);
        }
        if (trackingType.equals("ga")) {
            workingHtml = emailTracking.addGaTrackingToUrl(workingHtml, urls);
            System.out.println("GA Click added: " + workingHtml);
        } else if (trackingType.equals("intTrack")) {
            workingHtml = emailTracking.addIntTrackingToUrl(workingHtml, urls);
            System.out.println("Internal Tracking: " + workingHtml);
        } else {
            workingHtml = emailTracking.addBothTrackingToUrl(workingHtml, urls);
        }

    }

    broadcastTemplate.setHtmlbody_tracking(workingHtml);
    System.out.println(broadcastTemplate.getHtmlbody_tracking());
    String confirm = broadcastTemplateService.SaveOrUpdate(broadcastTemplate);
    System.out.println(confirm);
    System.out.println(trackingFlg);
    System.out.println(openGAflg);
    System.out.println(openPixelFlg);
    System.out.println(trackingType);
    if (confirm == broadcastTemplate.getB_template_name()) {
        trackingConfig.setBcast_template_id(broadcastTemplate.getId());
        // taking care of tracking flg
        int tracking_flg = 0;
        if (trackingFlg == true) {
            tracking_flg = 1;
        }
        trackingConfig.setTracking_flg(tracking_flg);
        // taking care of openGAflg
        int open_ga_flg = 0;
        if (openGAflg == true) {
            open_ga_flg = 1;
        }
        trackingConfig.setOpen_ga_flg(open_ga_flg);
        // taking care of openPixelFlg
        int open_pixel_flg = 0;
        if (openPixelFlg == true) {
            open_pixel_flg = 1;
        }
        trackingConfig.setOpen_pixel_flg(open_pixel_flg);
        // set tracking type
        trackingConfig.setTracking_type(trackingType);
        // seting utm's
        trackingConfig.setUtm_campaign(urls.getUtmCampaign());
        trackingConfig.setUtm_content(urls.getUtmContent());
        trackingConfig.setUtm_medium(urls.getUtmMedium());
        trackingConfig.setUtm_source(urls.getUtmSource());
        trackingConfigService.SaveOrUpdate(trackingConfig);
    }
    // find images in html to be able to embed images in email as in-line attachments
    EmbeddedImage embeddedImage = new EmbeddedImage();
    List<String> imgList = new ArrayList<String>();
    String html = broadcastTemplate.getHtmlbody();
    Document doc = Jsoup.parse(html);
    Elements media = doc.select("[src]");
    for (Element src : media) {
        if (src.tagName().equals("img")) {
            imgList.add(src.attr("abs:src"));
        }
    }
    model.addAttribute("imgList", imgList);
    model.addAttribute("embeddedImage", embeddedImage);
    model.addAttribute("broadcastTemplate", broadcastTemplate);
    return "bcasttempembeddedimage";
}

From source file:org.kitesdk.spring.hbase.example.service.WebPageSnapshotService.java

/**
 * Parse the outlinks from a href tags in the document, and return them as a
 * list//from www .j  a va 2 s. c  om
 *
 * @param doc The document to parse
 * @return The list of outlinks as URL strings.
 */
private List<String> getOutlinksFromDocument(Document doc) {
    List<String> outlinks = new ArrayList<String>();
    Elements linkElements = doc.select("a[href]");
    for (Element linkElement : linkElements) {
        outlinks.add(linkElement.attr("href").trim());
    }
    return outlinks;
}

From source file:org.confab.PhpBB3Parser.java

/**
 * Parses each post for a particular topic.
 * @param  html         Html containing the posts to be parsed 
 * @return              List of Post objects 
 *///www  . j av a 2 s  . co  m
public List<Post> parsePosts(Document html, ForumThread parent) {
    Utilities.debug("Starting parsePosts");
    List<Post> ret = new ArrayList<Post>();

    // Each post should have it's own table
    Elements div_posts = html.select("div#posts");
    assert !div_posts.isEmpty();
    Elements posts_table = div_posts.select("table[id~=(post\\d+)]");
    assert !posts_table.isEmpty();

    for (Element el_post : posts_table) {
        Post new_post = new Post(parent);

        // Get post id (id=post\d+)
        new_post.id = el_post.attr("id").replace("post", "").trim();
        assert new_post.id != null;

        // Get post message 
        Elements el_message = el_post.select("div[id~=(post_message_\\d+)]");
        assert !el_message.isEmpty();
        new_post.message = el_message.first().text();
        assert new_post.message != null;
        Utilities.debug("new_post.message: " + new_post.message);

        // Get post author
        Elements el_author = el_post.select(".bigusername");
        assert !el_author.isEmpty();
        new_post.author.username = el_author.first().text();
        assert new_post.author != null;
        Utilities.debug("new_post.author: " + new_post.author);

        ret.add(new_post);
    }

    Utilities.debug("Finished parsePosts");
    return ret;
}

From source file:com.github.binlee1990.spider.video.spider.PersonCrawler.java

private Video generateVideo(String url, Document doc, String videoIdentificationCode) {
    Video video;/*from   w w  w.  ja va  2 s.  c  o m*/
    video = new Video();
    video.setUrl(url);

    Date now = new Date();
    video.setCreateTime(now);
    video.setUpdateTime(now);

    String title = doc.select("div#video_title a").first().text().toString();
    video.setTitle(title);

    video.setIdentificationCode(videoIdentificationCode);
    video.setOccurNumber(1);

    setVideoReleaseDate(doc, video);

    setVideoDuration(doc, video);

    setVideoPeople(doc, video);

    setVideoCount(doc, video);

    setVideoScore(doc, video);

    setVideoSingleFemaleFlag(doc, video);

    return video;
}

From source file:org.confab.PhpBB3Parser.java

/**
 * Parses each topic for a particular forum.
 * @param  forum        Document of html containing topics
 * @param  parent       Forum the threads belong to
 * @return              List of ForumThread objects 
 */// w  ww .ja  va  2  s .  c  om
public List<ForumThread> parseForumThreads(Document forum, Forum parent) {
    Utilities.debug("parseForumThreads");

    List<ForumThread> ret = new ArrayList<ForumThread>();

    // Get topic table
    Elements thread_table_tds = forum.select("tbody[id*=threadbits_forum_] td");
    if (thread_table_tds.isEmpty()) {
        Utilities.debug("It seems " + parent.url + " has no topics.");
        return ret;
    }

    // Get any stickies
    Elements stickies = thread_table_tds.select("td:contains(Sticky:)  a[id*=thread_title_]");

    // Get all topics
    Elements els_a = thread_table_tds.select("a[id*=thread_title_]");
    assert !els_a.isEmpty();

    // Loop topics and grab info about each
    for (Element el_a : els_a) {
        ForumThread new_topic = new ForumThread(parent);

        // Get topic 
        new_topic.title = el_a.text();
        assert new_topic.title != null;
        Utilities.debug("new_topic.title: " + new_topic.title);

        // Check if sticky
        if (stickies.html().contains(new_topic.title)) {
            new_topic.isSticky = true;
            Utilities.debug("new_topic.isSticky: " + new_topic.isSticky);
        }

        // Get URL
        new_topic.url = el_a.attr("href");
        assert new_topic.url != null;
        Utilities.debug("new_topic.url:" + new_topic.url);

        ret.add(new_topic);
    }

    Utilities.debug("end printForumThreads");
    return ret;
}

From source file:org.kitesdk.spring.hbase.example.service.WebPageSnapshotService.java

/**
 * Parse the keywords out of the meta tag if one exists. Otherwise, return an
 * empty list.//from  w  w  w.j  av  a 2  s  . c om
 *
 * @param doc The Document ot parse
 * @return The list of keywords.
 */
private List<String> getKeywordsFromDocument(Document doc) {
    List<String> keywords = new ArrayList<String>();
    Elements keywordsElements = doc.select("meta[name=keywords]");
    for (Element keywordsElement : keywordsElements) {
        for (String keyword : keywordsElement.attr("content").split(",")) {
            keywords.add(keyword.trim());
        }
    }
    return keywords;
}

From source file:me.vertretungsplan.parser.UntisInfoParser.java

private void parseSubstitutionDays(SubstitutionSchedule v, String lastChange, Document doc, String klasse)
        throws JSONException, CredentialInvalidException {
    Elements days = doc.select("#vertretung > p > b, #vertretung > b, p:has(a[href^=#]) > b");
    if (days.size() > 0) {
        for (Element dayElem : days) {
            SubstitutionScheduleDay day = new SubstitutionScheduleDay();

            day.setLastChangeString(lastChange);
            day.setLastChange(ParserUtils.parseDateTime(lastChange));

            String date = dayElem.text();
            day.setDateString(date);//  ww w.j a va 2  s . c o m
            day.setDate(ParserUtils.parseDate(date));

            Element next;
            if (dayElem.parent().tagName().equals("p")) {
                next = dayElem.parent().nextElementSibling().nextElementSibling();
            } else {
                next = dayElem.parent().select("p").first().nextElementSibling();
            }
            parseDay(day, next, v, klasse);
        }
    } else if (doc.select("tr:has(td[align=center]):gt(0)").size() > 0) {
        parseSubstitutionTable(v, null, doc);
        v.setLastChangeString(lastChange);
        v.setLastChange(ParserUtils.parseDateTime(lastChange));
    }
}

From source file:me.vertretungsplan.parser.DaVinciParser.java

@Override
public List<String> getAllClasses() throws IOException, JSONException, CredentialInvalidException {
    if (scheduleData.getData().has(PARAM_CLASSES_SOURCE)) {
        Document doc = Jsoup.parse(httpGet(scheduleData.getData().getString("classesSource"), ENCODING));
        List<String> classes = new ArrayList<>();
        Elements elems = doc.select("li.Class");
        if (elems.size() == 0) {
            // daVinci 5
            elems = doc.select("td[align=left] a");
        }// ww  w. j  a  v a 2s  .c o  m
        for (Element li : elems) {
            classes.add(li.text());
        }
        return classes;
    } else {
        return getClassesFromJson();
    }
}

From source file:org.jresponder.message.MessageRefImpl.java

/**
 * Render a message in the context of a particular subscriber
 * and subscription.//from  ww w  .java2 s .c om
 */
@Override
public boolean populateMessage(MimeMessage aMimeMessage, SendConfig aSendConfig, Subscriber aSubscriber,
        Subscription aSubscription) {

    try {

        // prepare context
        Map<String, Object> myRenderContext = new HashMap<String, Object>();
        myRenderContext.put("subscriber", aSubscriber);
        myRenderContext.put("subscription", aSubscription);
        myRenderContext.put("config", aSendConfig);
        myRenderContext.put("message", this);

        // render the whole file
        String myRenderedFileContents = TextRenderUtil.getInstance().render(fileContents, myRenderContext);

        // now parse again with Jsoup
        Document myDocument = Jsoup.parse(myRenderedFileContents);

        String myHtmlBody = "";
        String myTextBody = "";

        // html body
        Elements myBodyElements = myDocument.select("#htmlbody");
        if (!myBodyElements.isEmpty()) {
            myHtmlBody = myBodyElements.html();
        }

        // text body
        Elements myJrTextBodyElements = myDocument.select("#textbody");
        if (!myJrTextBodyElements.isEmpty()) {
            myTextBody = TextUtil.getInstance().getWholeText(myJrTextBodyElements.first());
        }

        // now build the actual message
        MimeMessage myMimeMessage = aMimeMessage;
        // wrap it in a MimeMessageHelper - since some things are easier with that
        MimeMessageHelper myMimeMessageHelper = new MimeMessageHelper(myMimeMessage);

        // set headers

        // subject
        myMimeMessageHelper.setSubject(TextRenderUtil.getInstance()
                .render((String) propMap.get(MessageRefProp.JR_SUBJECT.toString()), myRenderContext));

        // TODO: implement DKIM, figure out subetha

        String mySenderEmailPattern = aSendConfig.getSenderEmailPattern();
        String mySenderEmail = TextRenderUtil.getInstance().render(mySenderEmailPattern, myRenderContext);
        myMimeMessage.setSender(new InternetAddress(mySenderEmail));

        myMimeMessageHelper.setTo(aSubscriber.getEmail());

        // from
        myMimeMessageHelper.setFrom(
                TextRenderUtil.getInstance()
                        .render((String) propMap.get(MessageRefProp.JR_FROM_EMAIL.toString()), myRenderContext),
                TextRenderUtil.getInstance()
                        .render((String) propMap.get(MessageRefProp.JR_FROM_NAME.toString()), myRenderContext));

        // see how to set body

        // if we have both text and html, then do multipart
        if (myTextBody.trim().length() > 0 && myHtmlBody.trim().length() > 0) {

            // create wrapper multipart/alternative part
            MimeMultipart ma = new MimeMultipart("alternative");
            myMimeMessage.setContent(ma);
            // create the plain text
            BodyPart plainText = new MimeBodyPart();
            plainText.setText(myTextBody);
            ma.addBodyPart(plainText);
            // create the html part
            BodyPart html = new MimeBodyPart();
            html.setContent(myHtmlBody, "text/html");
            ma.addBodyPart(html);
        }

        // if only HTML, then just use that
        else if (myHtmlBody.trim().length() > 0) {
            myMimeMessageHelper.setText(myHtmlBody, true);
        }

        // if only text, then just use that
        else if (myTextBody.trim().length() > 0) {
            myMimeMessageHelper.setText(myTextBody, false);
        }

        // if neither text nor HTML, then the message is being skipped,
        // so we just return null
        else {
            return false;
        }

        return true;

    } catch (MessagingException e) {
        throw new RuntimeException(e);
    } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);
    }

}

From source file:com.wheelermarine.android.publicAccesses.Updater.java

@Override
protected Integer doInBackground(URL... urls) {

    try {//from www  . ja va  2 s.c om
        final DatabaseHelper db = new DatabaseHelper(context);

        SQLiteDatabase database = db.getWritableDatabase();
        if (database == null)
            throw new IllegalStateException("Unable to open database!");

        database.beginTransaction();
        try {
            // Clear out the old data.
            database.delete(DatabaseHelper.PublicAccessEntry.TABLE_NAME, null, null);

            // Connect to the web server and locate the FTP download link.
            Log.v(TAG, "Finding update: " + urls[0]);
            activity.runOnUiThread(new Runnable() {
                @Override
                public void run() {
                    progress.setMessage("Locating update...");
                    progress.setIndeterminate(true);
                }
            });
            Document doc = Jsoup.connect(urls[0].toString()).timeout(timeout * 1000).userAgent(userAgent).get();
            URL dataURL = null;
            for (Element element : doc.select("a")) {
                if (element.hasAttr("href") && element.attr("href").startsWith("ftp://ftp.dnr.state.mn.us")) {
                    dataURL = new URL(element.attr("href"));
                }
            }

            // Make sure the download URL was fund.
            if (dataURL == null)
                throw new FileNotFoundException("Unable to locate data URL.");

            // Connect to the FTP server and download the update.
            Log.v(TAG, "Downloading update: " + dataURL);
            activity.runOnUiThread(new Runnable() {
                @Override
                public void run() {
                    progress.setMessage("Downloading update...");
                    progress.setIndeterminate(true);
                }
            });
            FTPClient ftp = new FTPClient();
            try {
                ftp.setConnectTimeout(timeout * 1000);
                ftp.setDefaultTimeout(timeout * 1000);
                ftp.connect(dataURL.getHost());
                ftp.enterLocalPassiveMode();

                // After connection attempt, you should check the reply code
                // to verify success.
                if (!FTPReply.isPositiveCompletion(ftp.getReplyCode())) {
                    ftp.disconnect();
                    throw new IOException("FTP server refused connection: " + ftp.getReplyString());
                }

                // Login using the standard anonymous credentials.
                if (!ftp.login("anonymous", "anonymous")) {
                    ftp.disconnect();
                    throw new IOException("FTP Error: " + ftp.getReplyString());
                }

                Map<Integer, Location> locations = null;

                // Download the ZIP archive.
                Log.v(TAG, "Downloading: " + dataURL.getFile());
                ftp.setFileType(FTP.BINARY_FILE_TYPE);
                InputStream in = ftp.retrieveFileStream(dataURL.getFile());
                if (in == null)
                    throw new FileNotFoundException(dataURL.getFile() + " was not found!");
                try {
                    ZipInputStream zin = new ZipInputStream(in);
                    try {
                        // Locate the .dbf entry in the ZIP archive.
                        ZipEntry entry;
                        while ((entry = zin.getNextEntry()) != null) {
                            if (entry.getName().endsWith(entryName)) {
                                readDBaseFile(zin, database);
                            } else if (entry.getName().endsWith(shapeEntryName)) {
                                locations = readShapeFile(zin);
                            }
                        }
                    } finally {
                        try {
                            zin.close();
                        } catch (Exception e) {
                            // Ignore this error.
                        }
                    }
                } finally {
                    in.close();
                }

                if (locations != null) {
                    final int recordCount = locations.size();
                    activity.runOnUiThread(new Runnable() {
                        @Override
                        public void run() {
                            progress.setIndeterminate(false);
                            progress.setMessage("Updating locations...");
                            progress.setMax(recordCount);
                        }
                    });

                    int progress = 0;
                    for (int recordNumber : locations.keySet()) {
                        PublicAccess access = db.getPublicAccessByRecordNumber(recordNumber);
                        Location loc = locations.get(recordNumber);
                        access.setLatitude(loc.getLatitude());
                        access.setLongitude(loc.getLongitude());
                        db.updatePublicAccess(access);
                        publishProgress(++progress);
                    }
                }
            } finally {
                if (ftp.isConnected())
                    ftp.disconnect();
            }
            database.setTransactionSuccessful();
            return db.getPublicAccessesCount();
        } finally {
            database.endTransaction();
        }
    } catch (Exception e) {
        error = e;
        Log.e(TAG, "Error loading data: " + e.getLocalizedMessage(), e);
        return -1;
    }
}