Example usage for org.jsoup.nodes Element text

List of usage examples for org.jsoup.nodes Element text

Introduction

In this page you can find the example usage for org.jsoup.nodes Element text.

Prototype

public String text() 

Source Link

Document

Gets the combined text of this element and all its children.

Usage

From source file:Leitura.Ecobertura.java

public boolean cobertura() {
    Elements elements = document.getElementsByAttribute("align");
    for (Element perc : elements) {
        if (!perc.text().equals("0%") && !perc.text().equals("N/A")) {
            return true;
        }/*w ww  . j av a2s .  c  o  m*/
    }
    return false;
}

From source file:com.mycompany.searchengineaggregator.SearchEngineAggregator.java

public ArrayList<JSONObject> getSearchResults(String query, SearchEngine searchEngine) throws JSONException {

    ArrayList<JSONObject> result = new ArrayList<>();
    StringBuilder url = new StringBuilder();
    Document doc = null;/*from   ww w.  j  a v a2  s  .com*/
    String userAgent = null;

    //Handles specific search engine connection details
    switch (searchEngine) {
    case Google:
        url.append("https://www.google.com/search?q=");
        url.append(query);
        userAgent = GoogleUserAgent;
        break;
    case Yahoo:
        url.append("https://search.yahoo.com/search?q=");
        url.append(query);
        userAgent = YahooUserAgent;
        break;
    case Bing:
        url.append("http://www.bing.com/search?q=");
        url.append(query);
        userAgent = BingUserAgent;
        break;

    }

    if (userAgent == null)
        return null;

    try {
        //Creates a connection, and fetches and parses the HTML file
        doc = Jsoup.connect(url.toString()).data("query", "Java").userAgent(userAgent).cookie("auth", "token")
                .timeout(3000).get();

    } catch (IOException ex) {
        Logger.getLogger(SearchEngineAggregator.class.getName()).log(Level.SEVERE, null, ex);
    }

    if (doc == null)
        return null;

    //Get all links with attribute href
    Elements links = doc.select("a[href]");
    int i = 1;

    //For every link, check if url was outbound, strip non-url substring and append to result
    for (Element link : links) {

        String tempLink = link.attr("href");
        String tempText = link.text();

        //Handles specific search engine result filters
        switch (searchEngine) {
        case Google:
            if (tempLink.startsWith("/url?q=") && !tempText.equals("Cached")) {
                tempLink = tempLink.replace("/url?q=", "");
                result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink));
                i++;
            }
            break;
        case Yahoo:
            if (tempLink.startsWith("http") && !tempText.equals("Cached")) {
                result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink));
                i++;
                break;
            }
        case Bing:
            if (tempLink.startsWith("http") && !tempText.equals("Cached")) {
                result.add(toSearchResultJSONObject(searchEngine.toString(), i, tempText, tempLink));
                i++;
                break;
            }
        }

    }

    return result;

}

From source file:de.stkl.gbgvertretungsplan.sync.SyncAdapter.java

private List<String> parseCategories(Element root) {
    // get table/*from  w w w. jav  a  2  s. c  o m*/
    //Log.d(LOG_TAG, root.toString());
    Element table = root.select("table.mon_list").first();
    // category headlines
    List<String> categories = new ArrayList<String>();
    for (Element headline : table.select("tr:first-child th")) {
        categories.add(headline.text());
    }

    return categories;
}

From source file:neembuu.release1.externalImpl.linkhandler.SaveVideoYoutubeLinkHandlerProvider.java

private BasicLinkHandler.Builder saveVideoExtraction(TrialLinkHandler tlh, int retryCount) throws Exception {
    String url = tlh.getReferenceLinkString();
    BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create();

    try {/* w  w w  .j  ava2  s .  c o  m*/
        DefaultHttpClient httpClient = NHttpClient.getNewInstance();
        String requestUrl = "http://www.save-video.com/download.php?url=" + URLEncoder.encode(url, "UTF-8");

        final String responseString = NHttpClientUtils.getData(requestUrl, httpClient);

        //Set the group name as the name of the video
        String nameOfVideo = getVideoName(url);

        String fileName = "text";

        linkHandlerBuilder.setGroupName(nameOfVideo);

        long c_duration = -1;

        Document doc = Jsoup.parse(responseString);

        Elements elements = doc.select(".sv-download-links ul li a");

        for (Element element : elements) {
            String singleUrl = element.attr("href");

            if (!singleUrl.startsWith("DownloadFile.php")) {
                fileName = element.text();
                singleUrl = Utils.normalize(singleUrl);
                LOGGER.log(Level.INFO, "Normalized URL: {0}", singleUrl);
                long length = NHttpClientUtils.calculateLength(singleUrl, httpClient);

                //LOGGER.log(Level.INFO,"Length: " + length);

                if (length <= 0) {
                    continue;
                    /*skip this url*/ }

                BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder.createFile();

                try { // finding video/audio length
                    //                        String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&");
                    //                        long duration = (int)(Double.parseDouble(dur)*1000);
                    //                        if(c_duration < 0 ){ c_duration = duration; }
                    //                        fileBuilder.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, duration);
                    //                        LOGGER.log(Level.INFO,"dur="+dur);
                } catch (NumberFormatException a) {
                    // ignore
                }

                try { // finding the quality short name
                    //                        String type = fileName.substring(fileName.indexOf("(")+1);
                    String type = fileName;
                    fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION,
                            type);
                    LOGGER.log(Level.INFO, "type={0}", type);
                } catch (Exception a) {
                    a.printStackTrace();
                }

                fileName = nameOfVideo + " " + fileName;

                fileBuilder.setName(fileName).setUrl(singleUrl).setSize(length).next();
            }
        }

        for (OnlineFile of : linkHandlerBuilder.getFiles()) {
            long dur = of.getPropertyProvider()
                    .getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS);
            if (dur < 0 && c_duration > 0 && of.getPropertyProvider() instanceof BasicPropertyProvider) {
                ((BasicPropertyProvider) of.getPropertyProvider()).putLongPropertyValue(
                        PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, c_duration);
            }
        }

    } catch (Exception ex) {
        ex.printStackTrace();
    }

    return linkHandlerBuilder;
}

From source file:com.mycompany.grabberrasskazov.threads.ThreadForPageSave.java

public void indexStory(String pageUrl) {
    try {/*from www.j av a 2s. c om*/
        String oldId = pageUrl.replace(GlobalVars.mainSite, "");
        if (!mainBean.storyExists(oldId)) {
            Stories r = new Stories();

            Document doc = Jsoup.connect(pageUrl)
                    .userAgent("Opera/9.80 (X11; Linux x86_64) " + "Presto/2.12.388 Version/12.16").get();

            Elements nameBlockElements = doc.select("b:containsOwn(?)");
            Element nameBlock = nameBlockElements.get(0);
            nameBlock = nameBlock.parent().parent();
            nameBlockElements = nameBlock.select("td:eq(1)");
            nameBlock = nameBlockElements.get(0);
            String storyName = nameBlock.text();
            r.setStoryName(storyName);

            // Start of processing writer
            Elements writerBlockElements = doc.select("b:containsOwn(?:)");
            Element writerBlock = writerBlockElements.get(0);
            writerBlock = writerBlock.parent().parent();
            writerBlockElements = writerBlock.select("td:eq(1)");
            writerBlock = writerBlockElements.get(0);

            String writersUrl = writerBlock.select("a:eq(0)").attr("href");
            String writersName = writerBlock.select("a:eq(0)").text();
            String writersContacts = writerBlock.select("a:eq(1)").attr("href");

            StoryWriters storyWriter = new StoryWriters();
            storyWriter.setOldId(writersUrl);
            storyWriter.setWriterEmail(writersContacts);
            storyWriter.setWriterName(writersName);
            storyWriter = mainBean.saveWriter(storyWriter);

            Set<StoriesToWritersRelations> storiesToWritersRelationses = new HashSet<StoriesToWritersRelations>();
            StoriesToWritersRelations storiesToWritersRelations = new StoriesToWritersRelations();
            storiesToWritersRelations.setStories(r);
            storiesToWritersRelations.setStoryWriters(storyWriter);
            r.setStoriesToWritersRelationses(storiesToWritersRelationses);

            // End of processing writer
            Set<StoriesToCategoriessRelations> catsRelationses = new HashSet<>();
            Elements katsInfo = doc.select("a[href*=ras.shtml?kat]");
            for (Element kat : katsInfo) {
                String katId = kat.attr("href");
                StoryCategories cat = mainBean.getCat(katId);

                StoriesToCategoriessRelations catsRelations = new StoriesToCategoriessRelations();
                catsRelations.setStoryCategories(cat);
                catsRelations.setStories(r);

                catsRelationses.add(catsRelations);

            }
            r.setStoriesToCategoriessRelationses(catsRelationses);

            Elements textBlocks = doc.select("p[align=justify]");
            Element textBlock = textBlocks.get(0);
            String textStr = textBlock.html();
            r.setStoryText(textStr.replace("\"", "'"));

            r.setOldId(oldId);

            mainBean.saveStory(r);
        }

    } catch (IOException ex) {
        ex.printStackTrace();
    }

}

From source file:info.smartkit.hairy_batman.query.SogouSearchQuery.java

public void parseWxUserId() {
    Document doc;//w ww .  j  a va  2s .  c o  m
    try {

        // need http protocol
        doc = Jsoup.connect(GlobalConsts.SOGOU_SEARCH_URL_BASE + wxFoo.getSubscribeId()).get();

        // get all "?:" value of html <span>
        Elements openIdSpans = doc.select(GlobalConsts.SOGOU_SEARCH_WX_USER_ID_HTML_ELEMENTS);
        //
        for (Element openIdSpan : openIdSpans) {
            if (openIdSpan.hasText()) {
                if (openIdSpan.text().contains(GlobalConsts.SOGOU_SEARCH_WX_USER_ID_KEYWORDS)) {
                    // get the value from href attribute
                    LOG.info("openId span text : " + openIdSpan.text());
                    // FIXME:????
                    if (this.wxFoo.getUserId() == null) {
                        this.wxFoo.setOpenId(
                                openIdSpan.text().split(GlobalConsts.SOGOU_SEARCH_WX_USER_ID_KEYWORDS)[1]);
                        LOG.info("saved wxUserId value: " + this.wxFoo.getUserId());
                        GlobalVariables.wxFooListWithUserId.add(this.wxFoo);
                    }
                }
            }
        }

    } catch (IOException e) {
        // e.printStackTrace();
        LOG.error(e.toString());
    }
}

From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java

private void renderSyntaxHighlightingHtml(final Document slidesDocument, final Configuration config) {
    for (Element code : slidesDocument.select("code")) {
        Charset encoding = config.getInputEncoding();
        ByteArrayInputStream input = new ByteArrayInputStream(code.text().getBytes(encoding));
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        String className = code.className();
        if (StringUtils.isBlank(className)) {
            className = "java";
        }//from ww w .j av  a2s. c om
        Renderer renderer = XhtmlRendererFactory.getRenderer(className);
        if (renderer != null) {
            try {
                renderer.highlight("slidery", input, out, encoding.name(), true);
                code.html(new String(out.toByteArray(), encoding));
                code.select("br").remove();
                removeComments(code);
                code.html(code.html().trim());
                Element parent = code.parent();
                if (parent.tagName() == "pre") {
                    parent.addClass("code");
                }
            } catch (IOException e) {
                // TODO: Handle exception
            }
        }
    }
}

From source file:ac.ucy.cs.spdx.license.License.java

/**
 * License constructor that creates a new License object based on the
 * license url and also the category parameters, which is optional.
 * /*  w ww.  j ava 2  s.  c o m*/
 * @param String
 * @param Category
 */
public License(String url, Category... category) throws IOException {
    if (category.length != 0)
        this.setCategory(category[0]);
    Document doc = null;

    doc = Jsoup.connect(url).get();

    Element fullName = doc.getElementsByAttributeValue("property", "rdfs:label").get(0);

    this.setLicenseName(fullName.text());

    Element identifier = doc.getElementsByAttributeValue("property", "spdx:licenseId").get(0);
    this.setIdentifier(identifier.text());

    Element licenseText = doc.getElementsByAttributeValue("property", "spdx:licenseText").get(0);
    this.setLicenseText(WordUtils.wrap(licenseText.text(), 80));
    licenses.add(this);
    saveLicense(this);
}

From source file:me.vertretungsplan.parser.ESchoolParser.java

private void parseTable(Element table, SubstitutionScheduleDay day) {
    for (Element th : table.select("th[colspan=10]")) {
        String lesson;//w w  w  . j  ava 2 s .co  m

        Pattern pattern = Pattern.compile("(\\d+)\\. Stunde");
        Matcher matcher = pattern.matcher(th.text());
        if (matcher.find()) {
            lesson = matcher.group(1);
        } else {
            lesson = th.text();
        }

        // skip over table headers
        Element row = th.parent().nextElementSibling().nextElementSibling();
        while (row != null && row.select("th").size() == 0) {
            Substitution subst = new Substitution();
            subst.setLesson(lesson);

            Elements columns = row.select("td");

            String[] classes = columns.get(0).text().split(", |\\+");
            subst.setClasses(new HashSet<>(Arrays.asList(classes)));

            subst.setPreviousTeacher(getPreviousValue(columns.get(1)));
            subst.setTeacher(getNewValue(columns.get(1)));
            subst.setPreviousSubject(getPreviousValue(columns.get(2)));
            subst.setSubject(getNewValue(columns.get(2)));
            subst.setPreviousRoom(getPreviousValue(columns.get(3)));
            subst.setRoom(getNewValue(columns.get(3)));
            if (columns.get(4).text().isEmpty()) {
                subst.setType("Vertretung");
                subst.setColor(colorProvider.getColor("Vertretung"));
            } else {
                String desc = columns.get(4).text();
                subst.setDesc(desc);
                String recognizedType = recognizeType(desc);
                if (recognizedType == null)
                    recognizedType = "Vertretung";
                subst.setType(recognizedType);
                subst.setColor(colorProvider.getColor(recognizedType));
            }

            day.addSubstitution(subst);

            row = row.nextElementSibling();
        }
    }
}

From source file:org.jresponder.util.TextUtil.java

/**
 * @param cell element that contains whitespace formatting
 * @return/*from  ww w  .j  a  v  a2s.  co  m*/
 */
public String getWholeText(Element cell) {
    String text = null;
    List<Node> childNodes = cell.childNodes();
    if (childNodes.size() > 0) {
        Node childNode = childNodes.get(0);
        if (childNode instanceof TextNode) {
            text = ((TextNode) childNode).getWholeText();
        }
    }
    if (text == null) {
        text = cell.text();
    }
    return text;
}